Commit 22471de0 by 包珍珍 Committed by Enrico Pozzobon

knot

parent 8e94fb3c
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
.macro PUSH_CONFLICT
push r16
push r17
push r18
push r19
push r23
push r24
push r25
push r26
push r27
push r28
push r29
push r30
push r31
.endm
.macro POP_CONFLICT
pop r31
pop r30
pop r29
pop r28
pop r27
pop r26
pop r25
pop r24
pop r23
pop r19
pop r18
pop r17
pop r16
.endm
.macro PUSH_ALL
push r2
push r3
push r4
push r5
push r6
push r7
push r8
push r9
push r10
push r11
push r12
push r13
push r14
push r15
push r16
push r17
push r28
push r29
.endm
.macro POP_ALL
pop r29
pop r28
pop r17
pop r16
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
pop r9
pop r8
pop r7
pop r6
pop r5
pop r4
pop r3
pop r2
clr r1
.endm
\ No newline at end of file
#ifndef __CONFIG_H__
#define __CONFIG_H__
#define CRYPTO_AEAD
//#define CRYPTO_HASH
#define MAX_MESSAGE_LENGTH 128
#define STATE_INBITS 256
/* For CRYPTO_AEAD */
#define CRYPTO_KEYBITS 128
/* For CRYPTO_HASH */
#define CRYPTO_BITS 256
#define STATE_INBYTES ((STATE_INBITS + 7) / 8)
#define ROW_INBITS ((STATE_INBITS + 3) / 4)
#define ROW_INBYTES ((ROW_INBITS + 7) / 8)
/* For CRYPTO_AEAD */
#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8)
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES
#define CRYPTO_ABYTES CRYPTO_KEYBYTES
#define CRYPTO_NOOVERLAP 1
#define MAX_ASSOCIATED_DATA_LENGTH 32
#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES)
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
/* For CRYPTO_HASH */
#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8)
#define DOMAIN_BITS 0x80
#define PAD_BITS 0x01
#define S384_R192_BITS 0x80
#if (STATE_INBITS==256)
#define C1 1
#define C2 8
#define C3 25
#elif (STATE_INBITS==384)
#define C1 1
#define C2 8
#define C3 55
#elif (STATE_INBITS==512)
#define C1 1
#define C2 16
#define C3 25
#else
#error "Not specified state size"
#endif
#ifdef CRYPTO_AEAD
/* For CRYPTO_AEAD */
#define KEY_INBITS (CRYPTO_KEYBYTES * 8)
#define KEY_INBYTES (CRYPTO_KEYBYTES)
#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8)
#define NONCE_INBYTES (CRYPTO_NPUBBYTES)
#define TAG_INBITS (CRYPTO_ABYTES * 8)
#define TAG_INBYTES (CRYPTO_ABYTES)
#if (KEY_INBITS==128) && (STATE_INBITS==256)
#define RATE_INBITS 64
#define NR_0 52
#define NR_i 28
#define NR_f 32
#elif (KEY_INBITS==128) && (STATE_INBITS==384)
#define RATE_INBITS 192
#define NR_0 76
#define NR_i 28
#define NR_f 32
#elif (KEY_INBITS==192) && (STATE_INBITS==384)
#define RATE_INBITS 96
#define NR_0 76
#define NR_i 40
#define NR_f 44
#elif (KEY_INBITS==256) && (STATE_INBITS==512)
#define RATE_INBITS 128
#define NR_0 100
#define NR_i 52
#define NR_f 56
#else
#error "Not specified key size and state size"
#endif
#define RATE_INBYTES ((RATE_INBITS + 7) / 8)
#define SQUEEZE_RATE_INBYTES TAG_INBYTES
#endif
#ifdef CRYPTO_HASH
/* For CRYPTO_HASH */
#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8)
#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256)
#define HASH_RATE_INBITS 32
#define HASH_SQUEEZE_RATE_INBITS 128
#define NR_h 68
#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384)
#define HASH_RATE_INBITS 128
#define HASH_SQUEEZE_RATE_INBITS 128
#define NR_h 80
#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384)
#define HASH_RATE_INBITS 48
#define HASH_SQUEEZE_RATE_INBITS 192
#define NR_h 104
#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512)
#define HASH_RATE_INBITS 64
#define HASH_SQUEEZE_RATE_INBITS 256
#define NR_h 140
#else
#error "Not specified hash digest size and state size"
#endif
#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8)
#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8)
#endif
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
#endif
\ No newline at end of file
#ifdef __cplusplus
extern "C" {
#endif
int crypto_aead_encrypt(
unsigned char *c,unsigned long long *clen,
const unsigned char *m,unsigned long long mlen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m,unsigned long long *outputmlen,
unsigned char *nsec,
const unsigned char *c,unsigned long long clen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
#ifdef __cplusplus
}
#endif
#include <avr/io.h>
#include <avr/sfr_defs.h>
#include <stdlib.h>
#include <string.h>
#include "config.h"
extern void crypto_aead_encrypt_asm(
unsigned char *c,
const unsigned char *m,
unsigned char mlen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern int crypto_aead_decrypt_asm(
unsigned char *m,
const unsigned char *c,
unsigned char clen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern void crypto_hash_asm(
unsigned char *out,
const unsigned char *in,
unsigned char inlen
);
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the cipher implementation goes here,
... generating a ciphertext c[0],c[1],...,c[*clen-1]
... from a plaintext m[0],m[1],...,m[mlen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce npub[0],npub[1],..
... and secret key k[0],k[1],...
... the implementation shall not use nsec
...
... return 0;
*/
(void)nsec;
crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k);
*clen = mlen + TAG_INBYTES;
return 0;
}
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the AEAD implementation goes here,
... generating a plaintext m[0],m[1],...,m[*mlen-1]
... and secret message number nsec[0],nsec[1],...
... from a ciphertext c[0],c[1],...,c[clen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce number npub[0],npub[1],...
... and secret key k[0],k[1],...
...
... return 0;
*/
unsigned long long mlen_;
unsigned char tag_is_match;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
return -1;
}
mlen_ = clen - CRYPTO_ABYTES;
tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k);
if (tag_is_match != 0)
{
memset(m, 0, (size_t)mlen_);
return -1;
}
*mlen = mlen_;
return 0;
}
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
;
; ============================================
; S R A M D E F I N I T I O N S
; ============================================
;
#include <avr/io.h>
#include "config.h"
.section .noinit
SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#if (STATE_INBYTES > 32)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
#if (STATE_INBYTES > 48)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0
SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0
SRAM_MESSAGE_IN_LEN: .BYTE 0, 0
#ifdef CRYPTO_AEAD
; For CRYPTO_AEAD
SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0
SRAM_ADLEN: .BYTE 0, 0
SRAM_NONCE_ADDR: .BYTE 0, 0
SRAM_KEY_ADDR: .BYTE 0, 0
SRAM_ADDITIONAL:
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#if (CRYPTO_ABYTES > 16)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
#if (CRYPTO_ABYTES > 24)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
#endif
.section .text
#include "permutation.h"
; require YH:YL be the address of the current associated data/cipher/message block
; for enc and dec, store ciphertext or plaintext
; require ZH:ZL be the address of the current cipher/message block
.macro XOR_to_State_ENCDEC
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
mov cnt0, rate
XOR_to_State_loop_ENCDEC:
ld tmp0, Y+ ; plaintext/ciphertext
ld tmp1, X ; state
eor tmp1, tmp0 ; ciphertext/plaintext
st Z+, tmp1 ; store ciphertext/plaintext
sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte
mov tmp1, tmp0 ; if dec, replace state
st X+, tmp1 ; store state byte
dec cnt0
brne XOR_to_State_loop_ENCDEC
; YH:YL are now the address of the next associated data block
.endm
; require YH:YL be the address of the current associated data/cipher/message block
; for enc and dec, store ciphertext or plaintext
; require ZH:ZL be the address of the current cipher/message block
.macro XOR_to_State_AUTH
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
mov cnt0, rate
XOR_to_State_loop_AUTH:
ld tmp0, Y+ ; plaintext/ciphertext
ld tmp1, X ; state
eor tmp1, tmp0 ; ciphertext/plaintext
st X+, tmp1 ; store state byte
dec cnt0
brne XOR_to_State_loop_AUTH
; YH:YL are now the address of the next associated data block
.endm
; require YH:YL pointed to the input data
; require ZH:ZL pointed to the output data
; require cnt0 containes the nubmer of bytes in source data
; require number of bytes in source data less than rate, i.e., 0 <= cnt0 < rate
;
; the 0th bit in AEDH is used to distinguish (auth AD) or (enc/dec M/C):
; AEDH[0] = 0 for (auth AD), AEDH[0] = 1 for (enc/dec M/C)
; the 1th bit in AEDH is used to distinguish (auth AD/enc M) or (dec C):
; AEDH[1] = 0 for (auth AD/enc M), AEDH[1] = 1 for (dec C)
; AEDH = 0b000 for (auth AD)
; AEDH = 0b001 for (enc M)
; AEDH = 0b011 for (dec C)
Pad_XOR_to_State:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
tst cnt0
breq XOR_padded_data
XOR_source_data_loop:
ld tmp0, Y+ ; plaintext/ciphertext
ld tmp1, X ; state
eor tmp1, tmp0 ; ciphertext/plaintext
sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result
st Z+, tmp1 ; store ciphertext/plaintext
sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte
mov tmp1, tmp0 ; if dec, replace state
st X+, tmp1 ; store state byte
dec cnt0
brne XOR_source_data_loop
XOR_padded_data:
ldi tmp0, PAD_BITS
ld tmp1, X
eor tmp1, tmp0
st X, tmp1
ret
AddDomain:
ldi XH, hi8(SRAM_STATE + STATE_INBYTES - 1)
ldi XL, lo8(SRAM_STATE + STATE_INBYTES - 1)
ldi tmp0, DOMAIN_BITS
ld tmp1, X
eor tmp0, tmp1
st X, tmp0
ret
; require ZH:ZL be the address of the destination
EXTRACT_from_State:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
mov tmp1, rate
EXTRACT_from_State_loop:
ld tmp0, X+
st Z+, tmp0
dec tmp1
brne EXTRACT_from_State_loop
ret
AUTH:
tst radlen
breq AUTH_end
cp radlen, rate
brlo auth_ad_padded_block
auth_ad_loop:
XOR_to_State_AUTH
rcall Permutation
sub radlen, rate
cp radlen, rate
brlo auth_ad_padded_block
rjmp auth_ad_loop
auth_ad_padded_block:
mov cnt0, radlen
rcall Pad_XOR_to_State
rcall Permutation
AUTH_end:
ret
#ifdef CRYPTO_AEAD
Initialization:
ldi rn, NR_0
ldi XL, lo8(SRAM_STATE)
ldi XH, hi8(SRAM_STATE)
lds YH, SRAM_NONCE_ADDR
lds YL, SRAM_NONCE_ADDR + 1
ldi cnt0, CRYPTO_NPUBBYTES
load_nonce_loop:
ld tmp0, Y+
st X+, tmp0
dec cnt0
brne load_nonce_loop
lds YH, SRAM_KEY_ADDR
lds YL, SRAM_KEY_ADDR + 1
ldi cnt0, CRYPTO_KEYBYTES
load_key_loop:
ld tmp0, Y+
st X+, tmp0
dec cnt0
brne load_key_loop
#if (STATE_INBITS==384) && (RATE_INBITS==192)
ldi cnt0, (STATE_INBYTES - CRYPTO_NPUBBYTES - CRYPTO_KEYBYTES - 1)
clr tmp0
empty_state_loop:
st X+, tmp0
dec cnt0
brne empty_state_loop
ldi tmp0, S384_R192_BITS
st X+, tmp0
#endif
rcall Permutation
ret
ENC:
tst mclen
breq ENC_end
cp mclen, rate
brlo enc_padded_block
enc_loop:
XOR_to_State_ENCDEC
ldi rn, NR_i
rcall Permutation
sub mclen, rate
cp mclen, rate
brlo enc_padded_block
rjmp enc_loop
enc_padded_block:
mov cnt0, mclen
rcall Pad_XOR_to_State
ENC_end:
ret
Finalization:
ldi rate, SQUEEZE_RATE_INBYTES
ldi rn, NR_f
rcall Permutation
rcall EXTRACT_from_State
ret
; void crypto_aead_encrypt_asm(
; unsigned char *c,
; const unsigned char *m,
; unsigned long long mlen,
; const unsigned char *ad,
; unsigned long long adlen,
; const unsigned char *npub,
; const unsigned char *k
; )
;
; unsigned char *c, is passed in r24:r25
; const unsigned char *m, is passed in r22:r23
; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used
; const unsigned char *ad, is passed in r18:r19
; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used
; const unsigned char *npub, is passed in r14:r15
; const unsigned char *k is passed in r12:r13
.global crypto_aead_encrypt_asm
crypto_aead_encrypt_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN
st X+, r20
st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR
st X+, r18
st X+, r17 ;store associated data length in SRAM_ADLEN
st X+, r16
st X+, r15 ;store nonce address in SRAM_NONCE_ADDR
st X+, r14
st X+, r13 ;store key address in SRAM_KEY_ADDR
st X+, r12
mov radlen, r16
mov mclen, r20
rcall Initialization
ldi rn, NR_i
ldi rate, RATE_INBYTES
ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_ASSOCIATED_DATA_ADDR
lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1
rcall AUTH
rcall AddDomain
ldi AEDH, 0b001 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_MESSAGE_IN_ADDR
lds YL, SRAM_MESSAGE_IN_ADDR + 1
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
rcall ENC
rcall Finalization
POP_ALL
ret
; int crypto_aead_decrypt_asm(
; unsigned char *m,
; const unsigned char *c,
; unsigned long long clen,
; const unsigned char *ad,
; unsigned long long adlen,
; const unsigned char *npub,
; const unsigned char *k
; )
;
; unsigned char *m, is passed in r24:r25
; const unsigned char *c, is passed in r22:r23
; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used
; const unsigned char *ad, is passed in r18:r19
; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used
; const unsigned char *npub, is passed in r14:r15
; const unsigned char *k is passed in r12:r13
.global crypto_aead_decrypt_asm
crypto_aead_decrypt_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN
st X+, r20
st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR
st X+, r18
st X+, r17 ;store associated data length in SRAM_ADLEN
st X+, r16
st X+, r15 ;store nonce address in SRAM_NONCE_ADDR
st X+, r14
st X+, r13 ;store key address in SRAM_KEY_ADDR
st X+, r12
mov radlen, r16
mov mclen, r20
rcall Initialization
ldi rn, NR_i
ldi rate, RATE_INBYTES
ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_ASSOCIATED_DATA_ADDR
lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1
rcall AUTH
rcall AddDomain
ldi AEDH, 0b011 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_MESSAGE_IN_ADDR
lds YL, SRAM_MESSAGE_IN_ADDR + 1
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
rcall ENC
ldi ZH, hi8(SRAM_ADDITIONAL)
ldi ZL, lo8(SRAM_ADDITIONAL)
rcall Finalization
sbiw ZL, CRYPTO_ABYTES
ldi cnt0, CRYPTO_ABYTES
compare_tag:
ld tmp0, Z+
ld tmp1, Y+
cp tmp0, tmp1
brne return_tag_not_match
dec cnt0
brne compare_tag
rjmp return_tag_match
return_tag_not_match:
ldi r25, 0xFF
ldi r24, 0xFF
rjmp crypto_aead_decrypt_end
return_tag_match:
clr r25
clr r24
crypto_aead_decrypt_end:
POP_ALL
ret
; #ifdef CRYPTO_AEAD
#endif
#ifdef CRYPTO_HASH
; void crypto_hash_asm(
; unsigned char *out,
; const unsigned char *in,
; unsigned long long inlen
; )
;
; unsigned char *out, is passed in r24:r25
; const unsigned char *in, is passed in r22:r23
; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used
.global crypto_hash_asm
crypto_hash_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN
st X+, r20
mov mclen, r20
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128)
ldi cnt0, STATE_INBYTES - 1
#else
ldi cnt0, STATE_INBYTES
#endif
clr tmp0
zero_state:
st X+, tmp0
dec cnt0
brne zero_state
#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128)
ldi tmp0, S384_R192_BITS
st X+, tmp0
#endif
ldi rn, NR_h
ldi AEDH, 0b100
HASH_ABSORBING:
mov radlen, mclen
tst radlen
breq EMPTY_M
ldi rate, HASH_RATE_INBYTES
lds YH, SRAM_MESSAGE_IN_ADDR
lds YL, SRAM_MESSAGE_IN_ADDR + 1
rcall AUTH
rjmp HASH_SQUEEZING
EMPTY_M:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
ldi tmp0, PAD_BITS
ld tmp1, X
eor tmp1, tmp0
st X, tmp1
rcall Permutation
HASH_SQUEEZING:
ldi rate, HASH_SQUEEZE_RATE_INBYTES
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
ldi tcnt, CRYPTO_BYTES
SQUEEZING_loop:
rcall EXTRACT_from_State
subi tcnt, HASH_SQUEEZE_RATE_INBYTES
breq HASH_SQUEEZING_end
rcall Permutation
rjmp SQUEEZING_loop
HASH_SQUEEZING_end:
POP_ALL
ret
#endif
; Byte Order In AVR 8:
; KNOT-AEAD(128, 256, 64):
; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[0] 0 Tag[ 0]
; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[1] 0 Tag[ 1]
; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[2] 0 Tag[ 2]
; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[3] 0 Tag[ 3]
; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[4] 0 Tag[ 4]
; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[5] 0 Tag[ 5]
; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[6] 0 Tag[ 6]
; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[7] <<< 0 Tag[ 7]
; N[ 8] AEAD_State[ 8] | Perm_row_1[0] 1
; N[ 9] AEAD_State[ 9] | Perm_row_1[1] 1
; N[10] AEAD_State[10] | Perm_row_1[2] 1
; N[11] AEAD_State[11] | Perm_row_1[3] 1
; N[12] AEAD_State[12] | Perm_row_1[4] 1
; N[13] AEAD_State[13] | Perm_row_1[5] 1
; N[14] AEAD_State[14] | Perm_row_1[6] 1
; N[15] AEAD_State[15] | Perm_row_1[7] <<< 1
; K[ 0] AEAD_State[16] | Perm_row_2[0] 8
; K[ 1] AEAD_State[17] | Perm_row_2[1] 8
; K[ 2] AEAD_State[18] | Perm_row_2[2] 8
; K[ 3] AEAD_State[19] | Perm_row_2[3] 8
; K[ 4] AEAD_State[20] | Perm_row_2[4] 8
; K[ 5] AEAD_State[21] | Perm_row_2[5] 8
; K[ 6] AEAD_State[22] | Perm_row_2[6] 8
; K[ 7] AEAD_State[23] | Perm_row_2[7] <<< 8
; K[ 8] AEAD_State[24] | Perm_row_3[0] 25
; K[ 9] AEAD_State[25] | Perm_row_3[1] 25
; K[10] AEAD_State[26] | Perm_row_3[2] 25
; K[11] AEAD_State[27] | Perm_row_3[3] 25
; K[12] AEAD_State[28] | Perm_row_3[4] 25
; K[13] AEAD_State[29] | Perm_row_3[5] 25
; K[14] AEAD_State[30] | Perm_row_3[6] 25
; K[15] AEAD_State[31] | ^0x80 Perm_row_3[7] <<< 25
;
;
; KNOT-AEAD(128, 384, 192):
; Initalization
; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[ 0] 0 Tag[ 0]
; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[ 1] 0 Tag[ 1]
; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[ 2] 0 Tag[ 2]
; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[ 3] 0 Tag[ 3]
; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[ 4] 0 Tag[ 4]
; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[ 5] 0 Tag[ 5]
; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[ 6] 0 Tag[ 6]
; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[ 7] 0 Tag[ 7]
; N[ 8] AEAD_State[ 8] | Message[ 8] 0x00 Perm_row_0[ 8] 0 Tag[ 8]
; N[ 9] AEAD_State[ 9] | Message[ 9] 0x00 Perm_row_0[ 9] 0 Tag[ 9]
; N[10] AEAD_State[10] | Message[10] 0x00 Perm_row_0[10] 0 Tag[10]
; N[11] AEAD_State[11] | Message[11] 0x00 Perm_row_0[11] <<< 0 Tag[11]
; N[12] AEAD_State[12] | Message[12] 0x00 Perm_row_1[ 0] 1 Tag[12]
; N[13] AEAD_State[13] | Message[13] 0x00 Perm_row_1[ 1] 1 Tag[13]
; N[14] AEAD_State[14] | Message[14] 0x00 Perm_row_1[ 2] 1 Tag[14]
; N[15] AEAD_State[15] | Message[15] 0x00 Perm_row_1[ 3] 1 Tag[15]
; K[ 0] AEAD_State[16] | Message[16] 0x00 Perm_row_1[ 4] 1
; K[ 1] AEAD_State[17] | Message[17] 0x00 Perm_row_1[ 5] 1
; K[ 2] AEAD_State[18] | Message[18] 0x00 Perm_row_1[ 6] 1
; K[ 3] AEAD_State[19] | Message[19] 0x00 Perm_row_1[ 7] 1
; K[ 4] AEAD_State[20] | Message[20] 0x00 Perm_row_1[ 8] 1
; K[ 5] AEAD_State[21] | Message[21] 0x00 Perm_row_1[ 9] 1
; K[ 6] AEAD_State[22] | Message[22] 0x00 Perm_row_1[10] 1
; K[ 7] AEAD_State[23] | Message[23] 0x00 Perm_row_1[11] <<< 1
; K[ 8] AEAD_State[24] | Perm_row_2[ 0] 8
; K[ 9] AEAD_State[25] | Perm_row_2[ 1] 8
; K[10] AEAD_State[26] | Perm_row_2[ 2] 8
; K[11] AEAD_State[27] | Perm_row_2[ 3] 8
; K[12] AEAD_State[28] | Perm_row_2[ 4] 8
; K[13] AEAD_State[29] | Perm_row_2[ 5] 8
; K[14] AEAD_State[30] | Perm_row_2[ 6] 8
; K[15] AEAD_State[31] | Perm_row_2[ 7] 8
; 0x00 AEAD_State[32] | Perm_row_2[ 8] 8
; 0x00 AEAD_State[33] | Perm_row_2[ 9] 8
; 0x00 AEAD_State[34] | Perm_row_2[10] 8
; 0x00 AEAD_State[35] | Perm_row_2[11] <<< 8
; 0x00 AEAD_State[36] | Perm_row_3[ 0] 55
; 0x00 AEAD_State[37] | Perm_row_3[ 1] 55
; 0x00 AEAD_State[38] | Perm_row_3[ 2] 55
; 0x00 AEAD_State[39] | Perm_row_3[ 3] 55
; 0x00 AEAD_State[40] | Perm_row_3[ 4] 55
; 0x00 AEAD_State[41] | Perm_row_3[ 5] 55
; 0x00 AEAD_State[42] | Perm_row_3[ 6] 55
; 0x00 AEAD_State[43] | Perm_row_3[ 7] 55
; 0x00 AEAD_State[44] | Perm_row_3[ 8] 55
; 0x00 AEAD_State[45] | Perm_row_3[ 9] 55
; 0x00 AEAD_State[46] | Perm_row_3[10] 55
; 0x00 ^0x80 AEAD_State[47] | ^0x80 Perm_row_3[11] <<< 55
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
#define x10 r0
#define x11 r1
#define x12 r2
#define x13 r3
#define x14 r4
#define x15 r5
#define x16 r6
#define x17 r7
; an intentionally arrangement of registers to facilitate movw
#define x20 r8
#define x21 r10
#define x22 r12
#define x23 r14
#define x24 r9
#define x25 r11
#define x26 r13
#define x27 r15
; an intentionally arrangement of registers to facilitate movw
#define x30 r16
#define x35 r18
#define x32 r20
#define x37 r22
#define x34 r17
#define x31 r19
#define x36 r21
#define x33 r23
#define t0j r24
#define t1j r25
#define x0j r27
#include "assist.h"
.macro Sbox i0, i1, i2, i3
mov t0j, \i1
com \i0
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
ld x10, Y+
ld x11, Y+
ld x12, Y+
ld x13, Y+
ld x14, Y+
ld x15, Y+
ld x16, Y+
ld x17, Y+
ld x20, Y+
ld x21, Y+
ld x22, Y+
ld x23, Y+
ld x24, Y+
ld x25, Y+
ld x26, Y+
ld x27, Y+
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH
rjmp For_Hash
For_AEAD:
ldi ZL, lo8(RC_LFSR6)
ldi ZH, hi8(RC_LFSR6)
rjmp round_loop_start
For_Hash:
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
#elif defined(CRYPTO_AEAD)
ldi ZL, lo8(RC_LFSR6)
ldi ZH, hi8(RC_LFSR6)
#else
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
#endif
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
ld x0j, Y
eor x0j, t0j
; SubColumns
Sbox x0j, x10, x20, x30
st Y+, x0j
ld x0j, Y
Sbox x0j, x11, x21, x31
st Y+, x0j
ld x0j, Y
Sbox x0j, x12, x22, x32
st Y+, x0j
ld x0j, Y
Sbox x0j, x13, x23, x33
st Y+, x0j
ld x0j, Y
Sbox x0j, x14, x24, x34
st Y+, x0j
ld x0j, Y
Sbox x0j, x15, x25, x35
st Y+, x0j
ld x0j, Y
Sbox x0j, x16, x26, x36
st Y+, x0j
ld x0j, Y
Sbox x0j, x17, x27, x37
st Y, x0j
; ShiftRows
; <<< 1
mov t0j, x17
rol t0j
rol x10
rol x11
rol x12
rol x13
rol x14
rol x15
rol x16
rol x17
; <<< 8
; 7 6 5 4 3 2 1 0 => 6 5 4 3 2 1 0 7
;mov t0j, x27
;mov x27, x26
;mov x26, x25
;mov x25, x24
;mov x24, x23
;mov x23, x22
;mov x22, x21
;mov x21, x20
;mov x20, t0j
; an intentionally arrangement of registers to facilitate movw
movw t0j, x23 ; t1j:t0j <= x27:x23
movw x23, x22 ; x27:x23 <= x26:x22
movw x22, x21 ; x26:x22 <= x25:x21
movw x21, x20 ; x25:x21 <= x24:x20
mov x20, t1j ; x20 <= t1j
mov x24, t0j ; x24 <= t0j
; <<< 1
mov t0j, x37
rol t0j
rol x30
rol x31
rol x32
rol x33
rol x34
rol x35
rol x36
rol x37
; <<< 24
; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5
;mov t0j, x30
;mov x30, x35
;mov x35, x32
;mov x32, x37
;mov x37, x34
;mov x34, x31
;mov x31, x36
;mov x36, x33
;mov x33, t0j
; an intentionally arrangement of registers to facilitate movw
;x30 r16
;x35 r18
;x32 r20
;x37 r22
;x34 r17
;x31 r19
;x36 r21
;x33 r23
movw t0j, x30 ; t1j:t0j <= x34:x30
movw x30, x35 ; x34:x30 <= x31:x35
movw x35, x32 ; x31:x35 <= x36:x32
movw x32, x37 ; x36:x32 <= x33:x37
mov x37, t1j ; x37 <= x34
mov x33, t0j ; x33 <= x30
dec rcnt
breq round_loop_end
jmp round_loop_start
round_loop_end:
ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
st Y+, x10
st Y+, x11
st Y+, x12
st Y+, x13
st Y+, x14
st Y+, x15
st Y+, x16
st Y+, x17
st Y+, x20
st Y+, x21
st Y+, x22
st Y+, x23
st Y+, x24
st Y+, x25
st Y+, x26
st Y+, x27
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
POP_CONFLICT
ret
.section .text
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
RC_LFSR6:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06
.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29
.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28
.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24
.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37
.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26
.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f
.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#elif defined(CRYPTO_AEAD)
RC_LFSR6:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06
.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29
.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28
.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24
.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37
.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26
.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f
.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00
#else
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#endif
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
; an intentionally arrangement of registers to facilitate movw
#define x20 r0
#define x21 r2
#define x22 r4
#define x23 r6
#define x24 r8
#define x25 r10
#define x26 r1
#define x27 r3
#define x28 r5
#define x29 r7
#define x2a r9
#define x2b r11
; an intentionally arrangement of registers to facilitate movw
#define x30 r22
#define x35 r20
#define x3a r18
#define x33 r16
#define x38 r14
#define x31 r12
#define x36 r23
#define x3b r21
#define x34 r19
#define x39 r17
#define x32 r15
#define x37 r13
#define t0j r24
#define t1j r25
#define x0j r25
#define x1j r27
#include "assist.h"
.macro Sbox i0, i1, i2, i3
ldi t0j, 0xFF
eor \i0, t0j
mov t0j, \i1
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
.macro OneColumn i0, i1, i2, i3
ld \i0, Y
ldd \i1, Y + ROW_INBYTES
Sbox \i0, \i1, \i2, \i3
st Y+, \i0
rol \i1 ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES -1, \i1
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
ld x20, Y+
ld x21, Y+
ld x22, Y+
ld x23, Y+
ld x24, Y+
ld x25, Y+
ld x26, Y+
ld x27, Y+
ld x28, Y+
ld x29, Y+
ld x2a, Y+
ld x2b, Y+
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
ld x38, Y+
ld x39, Y+
ld x3a, Y+
ld x3b, Y+
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
ld x0j, Y
eor x0j, t0j
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x20, x30
st Y+, x0j
lsl x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES -1, x1j
OneColumn x0j, x1j, x21, x31
OneColumn x0j, x1j, x22, x32
OneColumn x0j, x1j, x23, x33
OneColumn x0j, x1j, x24, x34
OneColumn x0j, x1j, x25, x35
OneColumn x0j, x1j, x26, x36
OneColumn x0j, x1j, x27, x37
OneColumn x0j, x1j, x28, x38
OneColumn x0j, x1j, x29, x39
OneColumn x0j, x1j, x2a, x3a
OneColumn x0j, x1j, x2b, x3b
ld x1j, Y
eor t0j, t0j
adc x1j, t0j
st Y, x1j
; b a 9 8 7 6 5 4 3 2 1 0
; -- -- -- -- -- -- -- -- -- -- -- x- 0
; -- -- -- -- -- -- -- -- -- -- -- x' 0
; -- -- -- -- -- -- -- -- -- -- x- -- 1
; -- -- -- -- x' -- -- -- -- -- -- -- 7
; 4 3 2 1 0 b a 9 8 7 6 5
; ShiftRows -- the last two rows
; <<< 8
; b a 9 8 7 6 5 4 3 2 1 0 => a 9 8 7 6 5 4 3 2 1 0 b
movw t0j, x25 ; t1j:t0j <= x2b:x25
movw x25, x24 ; x2b:x25 <= x2a:x24
movw x24, x23 ; x2a:x24 <= x29:x23
movw x23, x22 ; x29:x23 <= x28:x22
movw x22, x21 ; x28:x22 <= x27:x21
movw x21, x20 ; x27:x21 <= x26:x20
mov x26, t0j ; x26 <= x25
mov x20, t1j ; x20 <= x2b
; >>> 1
mov t0j, x3b
ror t0j
ror x3a
ror x39
ror x38
ror x37
ror x36
ror x35
ror x34
ror x33
ror x32
ror x31
ror x30
ror x3b
; <<< 56
; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5
; mov x3j, x30
; mov x30, x35
; mov x35, x3a
; mov x3a, x33
; mov x33, x38
; mov x38, x31
; mov x31, x36
; mov x36, x3b
; mov x3b, x34
; mov x34, x39
; mov x39, x32
; mov x32, x37
; mov x37, x3j
; an intentionally arrangement of registers to facilitate movw
; x30 r22
; x35 r20
; x3a r18
; x33 r16
; x38 r14
; x31 r12
; x36 r23
; x3b r21
; x34 r19
; x39 r17
; x32 r15
; x37 r13
movw t0j, x30 ; t1j:t0j <= x36:x30
movw x30, x35 ; x36:x30 <= x3b:x35
movw x35, x3a ; x3b:x35 <= x34:x3a
movw x3a, x33 ; x34:x3a <= x39:x33
movw x33, x38 ; x39:x33 <= x32:x38
movw x38, x31 ; x32:x38 <= x37:x31
mov x31, t1j ; x31 <= x36
mov x37, t0j ; x37 <= x30
dec rcnt
breq round_loop_end
jmp round_loop_start
round_loop_end:
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
st Y+, x20
st Y+, x21
st Y+, x22
st Y+, x23
st Y+, x24
st Y+, x25
st Y+, x26
st Y+, x27
st Y+, x28
st Y+, x29
st Y+, x2a
st Y+, x2b
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
st Y+, x38
st Y+, x39
st Y+, x3a
st Y+, x3b
POP_CONFLICT
ret
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
#define x20 r0
#define x22 r2
#define x24 r4
#define x26 r6
#define x28 r1
#define x2a r3
#define x2c r5
#define x2e r7
#define x30 r8
#define x3d r10
#define x3a r12
#define x37 r14
#define x34 r16
#define x31 r18
#define x3e r20
#define x3b r22
#define x38 r9
#define x35 r11
#define x32 r13
#define x3f r15
#define x3c r17
#define x39 r19
#define x36 r21
#define x33 r23
#define t0j r24
#define t1j r25
#define x0j r25
#define x1j r27
#define x2j r26
#include "assist.h"
.macro Sbox i0, i1, i2, i3
ldi t0j, 0xFF
eor \i0, t0j
mov t0j, \i1
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
.macro TwoColumns i2_e, i3_e, i3_o
; column 2i
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, \i2_e, \i3_e
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
; column 2i+1
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x2j, \i3_o
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
ldd t0j, Y + 2 * ROW_INBYTES + 1
std Y + 2 * ROW_INBYTES + 1, x2j
mov x2j, t0j
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
push rcnt
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
ldd x20, Y + 0x00
ldd x22, Y + 0x02
ldd x24, Y + 0x04
ldd x26, Y + 0x06
ldd x28, Y + 0x08
ldd x2a, Y + 0x0a
ldd x2c, Y + 0x0c
ldd x2e, Y + 0x0e
adiw YL, ROW_INBYTES
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
ld x38, Y+
ld x39, Y+
ld x3a, Y+
ld x3b, Y+
ld x3c, Y+
ld x3d, Y+
ld x3e, Y+
ld x3f, Y+
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH
rjmp For_Hash
For_AEAD:
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
rjmp round_loop_start
For_Hash:
ldi ZL, lo8(RC_LFSR8)
ldi ZH, hi8(RC_LFSR8)
#elif defined(CRYPTO_AEAD)
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
#else
ldi ZL, lo8(RC_LFSR8)
ldi ZH, hi8(RC_LFSR8)
#endif
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
; column 0
ld x0j, Y
eor x0j, t0j
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x20, x30
st Y+, x0j
lsl x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
; column 1
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
ldd x2j, Y + 2 * ROW_INBYTES
Sbox x0j, x1j, x2j, x31
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
ldd t0j, Y + 2 * ROW_INBYTES + 1
std Y + 2 * ROW_INBYTES + 1, x2j
mov x2j, t0j
; column 2, 3
TwoColumns x22, x32, x33
; column 4, 5
TwoColumns x24, x34, x35
; column 6, 7
TwoColumns x26, x36, x37
; column 8, 9
TwoColumns x28, x38, x39
; column 10, 11
TwoColumns x2a, x3a, x3b
; column 12, 13
TwoColumns x2c, x3c, x3d
; column 14
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x2e, x3e
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
; column 15
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x2j, x3f
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
ld x1j, Y
eor t0j, t0j
adc x1j, t0j
st Y, x1j
std Y + ROW_INBYTES + 1, x2j
; f e d c b a 9 8 7 6 5 4 3 2 1 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2
; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3
; c b a 9 8 7 6 5 4 3 2 1 0 f e d
; x2e x2c x2a x28 x26 x24 x22 x20 => x2c x2a x28 x26 x24 x22 x20 x2e
;mov t0j, x2e
;mov x2e, x2c
;mov x2c, x2a
;mov x2a, x28
;mov x28, x26
;mov x26, x24
;mov x24, x22
;mov x22, x20
;mov x20, t0j
; an intentionally arrangement of registers to facilitate movw
movw t0j, x26 ; t1j:t0j <= x2e:x26
movw x26, x24 ; x2e:x26 <= x2c:x24
movw x24, x22 ; x2c:x24 <= x2a:x22
movw x22, x20 ; x2a:x22 <= x28:x20
mov x20, t1j ; x20 <= t1j
mov x28, t0j ; x28 <= t0j
; <<< 1
mov t0j, x3f
rol t0j
rol x30
rol x31
rol x32
rol x33
rol x34
rol x35
rol x36
rol x37
rol x38
rol x39
rol x3a
rol x3b
rol x3c
rol x3d
rol x3e
rol x3f
; <<< 24
; f e d c b a 9 8 7 6 5 4 3 2 1 0 =>
; c b a 9 8 7 6 5 4 3 2 1 0 f e d
; mov x3j, x30
; mov x30, x3d
; mov x3d, x3a
; mov x3a, x37
; mov x37, x34
; mov x34, x31
; mov x31, x3e
; mov x3e, x3b
; mov x3b, x38
; mov x38, x35
; mov x35, x32
; mov x32, x3f
; mov x3f, x3c
; mov x3c, x39
; mov x39, x36
; mov x36, x33
; mov x33, x3j
; an intentionally arrangement of registers to facilitate movw
; x30 r8
; x3d r10
; x3a r12
; x37 r14
; x34 r16
; x31 r18
; x3e r20
; x3b r22
; x38 r9
; x35 r11
; x32 r13
; x3f r15
; x3c r17
; x39 r19
; x36 r21
; x33 r23
movw t0j, x30 ; t1j:t0j <= x38:x30
movw x30, x3d ; x38:x30 <= x35:x3d
movw x3d, x3a ; x35:x3d <= x32:x3a
movw x3a, x37 ; x32:x3a <= x3f:x37
movw x37, x34 ; x3f:x37 <= x3c:x34
movw x34, x31 ; x3c:x34 <= x39:x31
movw x31, x3e ; x39:x31 <= x36:x3e
movw x3e, x3b ; x36:x3e <= x33:x3b
mov x3b, t1j ; x3b <= x38
mov x33, t0j ; x33 <= x30
pop rcnt
dec rcnt
push rcnt
breq round_loop_end
rjmp round_loop_start
round_loop_end:
pop rcnt
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
std Y + 0x00, x20
std Y + 0x02, x22
std Y + 0x04, x24
std Y + 0x06, x26
std Y + 0x08, x28
std Y + 0x0a, x2a
std Y + 0x0c, x2c
std Y + 0x0e, x2e
adiw YL, ROW_INBYTES
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
st Y+, x38
st Y+, x39
st Y+, x3a
st Y+, x3b
st Y+, x3c
st Y+, x3d
st Y+, x3e
st Y+, x3f
POP_CONFLICT
ret
.section .text
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
RC_LFSR8:
.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e
.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25
.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0
.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9
.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37
.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20
.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad
.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac
.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0
.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb
.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a
.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8
.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86
.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7
.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c
.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5
.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4
.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52
.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7
.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76
.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b
.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4
.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd
.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46
.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e
.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab
.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5
.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09
.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe
.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17
.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d
.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00
#elif defined(CRYPTO_AEAD)
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#else
RC_LFSR8:
.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e
.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25
.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0
.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9
.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37
.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20
.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad
.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac
.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0
.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb
.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a
.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8
.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86
.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7
.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c
.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5
.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4
.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52
.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7
.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76
.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b
.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4
.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd
.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46
.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e
.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab
.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5
.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09
.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe
.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17
.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d
.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00
#endif
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
;
; ============================================
; R E G I S T E R D E F I N I T I O N S
; ============================================
;
#define mclen r16
#define radlen r17
#define tcnt r17
#define tmp0 r20
#define tmp1 r21
#define cnt0 r22
#define rn r23
#define rate r24
; AEDH = 0b000: for authenticate AD
; AEDH = 0b001: for encryption
; AEDH = 0b011: for decryption
; AEDH = 0b100: for hash
#define AEDH r25
#define rcnt r26
#if (STATE_INBITS==256)
#include "knot256.h"
#elif (STATE_INBITS==384)
#include "knot384.h"
#elif (STATE_INBITS==512)
#include "knot512.h"
#else
#error "Not specified key size and state size"
#endif
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
.macro PUSH_CONFLICT
push r16
push r17
push r18
push r19
push r23
push r24
push r25
push r26
push r27
push r28
push r29
push r30
push r31
.endm
.macro POP_CONFLICT
pop r31
pop r30
pop r29
pop r28
pop r27
pop r26
pop r25
pop r24
pop r23
pop r19
pop r18
pop r17
pop r16
.endm
.macro PUSH_ALL
push r2
push r3
push r4
push r5
push r6
push r7
push r8
push r9
push r10
push r11
push r12
push r13
push r14
push r15
push r16
push r17
push r28
push r29
.endm
.macro POP_ALL
pop r29
pop r28
pop r17
pop r16
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
pop r9
pop r8
pop r7
pop r6
pop r5
pop r4
pop r3
pop r2
clr r1
.endm
\ No newline at end of file
#ifndef __CONFIG_H__
#define __CONFIG_H__
#define CRYPTO_AEAD
//#define CRYPTO_HASH
#define MAX_MESSAGE_LENGTH 128
#define STATE_INBITS 384
/* For CRYPTO_AEAD */
#define CRYPTO_KEYBITS 128
/* For CRYPTO_HASH */
#define CRYPTO_BITS 256
#define STATE_INBYTES ((STATE_INBITS + 7) / 8)
#define ROW_INBITS ((STATE_INBITS + 3) / 4)
#define ROW_INBYTES ((ROW_INBITS + 7) / 8)
/* For CRYPTO_AEAD */
#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8)
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES
#define CRYPTO_ABYTES CRYPTO_KEYBYTES
#define CRYPTO_NOOVERLAP 1
#define MAX_ASSOCIATED_DATA_LENGTH 32
#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES)
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
/* For CRYPTO_HASH */
#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8)
#define DOMAIN_BITS 0x80
#define PAD_BITS 0x01
#define S384_R192_BITS 0x80
#if (STATE_INBITS==256)
#define C1 1
#define C2 8
#define C3 25
#elif (STATE_INBITS==384)
#define C1 1
#define C2 8
#define C3 55
#elif (STATE_INBITS==512)
#define C1 1
#define C2 16
#define C3 25
#else
#error "Not specified state size"
#endif
#ifdef CRYPTO_AEAD
/* For CRYPTO_AEAD */
#define KEY_INBITS (CRYPTO_KEYBYTES * 8)
#define KEY_INBYTES (CRYPTO_KEYBYTES)
#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8)
#define NONCE_INBYTES (CRYPTO_NPUBBYTES)
#define TAG_INBITS (CRYPTO_ABYTES * 8)
#define TAG_INBYTES (CRYPTO_ABYTES)
#if (KEY_INBITS==128) && (STATE_INBITS==256)
#define RATE_INBITS 64
#define NR_0 52
#define NR_i 28
#define NR_f 32
#elif (KEY_INBITS==128) && (STATE_INBITS==384)
#define RATE_INBITS 192
#define NR_0 76
#define NR_i 28
#define NR_f 32
#elif (KEY_INBITS==192) && (STATE_INBITS==384)
#define RATE_INBITS 96
#define NR_0 76
#define NR_i 40
#define NR_f 44
#elif (KEY_INBITS==256) && (STATE_INBITS==512)
#define RATE_INBITS 128
#define NR_0 100
#define NR_i 52
#define NR_f 56
#else
#error "Not specified key size and state size"
#endif
#define RATE_INBYTES ((RATE_INBITS + 7) / 8)
#define SQUEEZE_RATE_INBYTES TAG_INBYTES
#endif
#ifdef CRYPTO_HASH
/* For CRYPTO_HASH */
#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8)
#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256)
#define HASH_RATE_INBITS 32
#define HASH_SQUEEZE_RATE_INBITS 128
#define NR_h 68
#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384)
#define HASH_RATE_INBITS 128
#define HASH_SQUEEZE_RATE_INBITS 128
#define NR_h 80
#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384)
#define HASH_RATE_INBITS 48
#define HASH_SQUEEZE_RATE_INBITS 192
#define NR_h 104
#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512)
#define HASH_RATE_INBITS 64
#define HASH_SQUEEZE_RATE_INBITS 256
#define NR_h 140
#else
#error "Not specified hash digest size and state size"
#endif
#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8)
#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8)
#endif
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
#endif
\ No newline at end of file
#ifdef __cplusplus
extern "C" {
#endif
int crypto_aead_encrypt(
unsigned char *c,unsigned long long *clen,
const unsigned char *m,unsigned long long mlen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m,unsigned long long *outputmlen,
unsigned char *nsec,
const unsigned char *c,unsigned long long clen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
#ifdef __cplusplus
}
#endif
#include <avr/io.h>
#include <avr/sfr_defs.h>
#include <stdlib.h>
#include <string.h>
#include "config.h"
extern void crypto_aead_encrypt_asm(
unsigned char *c,
const unsigned char *m,
unsigned char mlen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern int crypto_aead_decrypt_asm(
unsigned char *m,
const unsigned char *c,
unsigned char clen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern void crypto_hash_asm(
unsigned char *out,
const unsigned char *in,
unsigned char inlen
);
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the cipher implementation goes here,
... generating a ciphertext c[0],c[1],...,c[*clen-1]
... from a plaintext m[0],m[1],...,m[mlen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce npub[0],npub[1],..
... and secret key k[0],k[1],...
... the implementation shall not use nsec
...
... return 0;
*/
(void)nsec;
crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k);
*clen = mlen + TAG_INBYTES;
return 0;
}
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the AEAD implementation goes here,
... generating a plaintext m[0],m[1],...,m[*mlen-1]
... and secret message number nsec[0],nsec[1],...
... from a ciphertext c[0],c[1],...,c[clen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce number npub[0],npub[1],...
... and secret key k[0],k[1],...
...
... return 0;
*/
unsigned long long mlen_;
unsigned char tag_is_match;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
return -1;
}
mlen_ = clen - CRYPTO_ABYTES;
tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k);
if (tag_is_match != 0)
{
memset(m, 0, (size_t)mlen_);
return -1;
}
*mlen = mlen_;
return 0;
}
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
;
; ============================================
; S R A M D E F I N I T I O N S
; ============================================
;
#include <avr/io.h>
#include "config.h"
.section .noinit
SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#if (STATE_INBYTES > 32)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
#if (STATE_INBYTES > 48)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0
SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0
SRAM_MESSAGE_IN_LEN: .BYTE 0, 0
#ifdef CRYPTO_AEAD
; For CRYPTO_AEAD
SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0
SRAM_ADLEN: .BYTE 0, 0
SRAM_NONCE_ADDR: .BYTE 0, 0
SRAM_KEY_ADDR: .BYTE 0, 0
SRAM_ADDITIONAL:
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#if (CRYPTO_ABYTES > 16)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
#if (CRYPTO_ABYTES > 24)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
#endif
.section .text
#include "permutation.h"
; require YH:YL be the address of the current associated data/cipher/message block
; for enc and dec, store ciphertext or plaintext
; require ZH:ZL be the address of the current cipher/message block
.macro XOR_to_State_ENCDEC
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
mov cnt0, rate
XOR_to_State_loop_ENCDEC:
ld tmp0, Y+ ; plaintext/ciphertext
ld tmp1, X ; state
eor tmp1, tmp0 ; ciphertext/plaintext
st Z+, tmp1 ; store ciphertext/plaintext
sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte
mov tmp1, tmp0 ; if dec, replace state
st X+, tmp1 ; store state byte
dec cnt0
brne XOR_to_State_loop_ENCDEC
; YH:YL are now the address of the next associated data block
.endm
; require YH:YL be the address of the current associated data/cipher/message block
; for enc and dec, store ciphertext or plaintext
; require ZH:ZL be the address of the current cipher/message block
.macro XOR_to_State_AUTH
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
mov cnt0, rate
XOR_to_State_loop_AUTH:
ld tmp0, Y+ ; plaintext/ciphertext
ld tmp1, X ; state
eor tmp1, tmp0 ; ciphertext/plaintext
st X+, tmp1 ; store state byte
dec cnt0
brne XOR_to_State_loop_AUTH
; YH:YL are now the address of the next associated data block
.endm
; require YH:YL pointed to the input data
; require ZH:ZL pointed to the output data
; require cnt0 containes the nubmer of bytes in source data
; require number of bytes in source data less than rate, i.e., 0 <= cnt0 < rate
;
; the 0th bit in AEDH is used to distinguish (auth AD) or (enc/dec M/C):
; AEDH[0] = 0 for (auth AD), AEDH[0] = 1 for (enc/dec M/C)
; the 1th bit in AEDH is used to distinguish (auth AD/enc M) or (dec C):
; AEDH[1] = 0 for (auth AD/enc M), AEDH[1] = 1 for (dec C)
; AEDH = 0b000 for (auth AD)
; AEDH = 0b001 for (enc M)
; AEDH = 0b011 for (dec C)
Pad_XOR_to_State:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
tst cnt0
breq XOR_padded_data
XOR_source_data_loop:
ld tmp0, Y+ ; plaintext/ciphertext
ld tmp1, X ; state
eor tmp1, tmp0 ; ciphertext/plaintext
sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result
st Z+, tmp1 ; store ciphertext/plaintext
sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte
mov tmp1, tmp0 ; if dec, replace state
st X+, tmp1 ; store state byte
dec cnt0
brne XOR_source_data_loop
XOR_padded_data:
ldi tmp0, PAD_BITS
ld tmp1, X
eor tmp1, tmp0
st X, tmp1
ret
AddDomain:
ldi XH, hi8(SRAM_STATE + STATE_INBYTES - 1)
ldi XL, lo8(SRAM_STATE + STATE_INBYTES - 1)
ldi tmp0, DOMAIN_BITS
ld tmp1, X
eor tmp0, tmp1
st X, tmp0
ret
; require ZH:ZL be the address of the destination
EXTRACT_from_State:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
mov tmp1, rate
EXTRACT_from_State_loop:
ld tmp0, X+
st Z+, tmp0
dec tmp1
brne EXTRACT_from_State_loop
ret
AUTH:
tst radlen
breq AUTH_end
cp radlen, rate
brlo auth_ad_padded_block
auth_ad_loop:
XOR_to_State_AUTH
rcall Permutation
sub radlen, rate
cp radlen, rate
brlo auth_ad_padded_block
rjmp auth_ad_loop
auth_ad_padded_block:
mov cnt0, radlen
rcall Pad_XOR_to_State
rcall Permutation
AUTH_end:
ret
#ifdef CRYPTO_AEAD
Initialization:
ldi rn, NR_0
ldi XL, lo8(SRAM_STATE)
ldi XH, hi8(SRAM_STATE)
lds YH, SRAM_NONCE_ADDR
lds YL, SRAM_NONCE_ADDR + 1
ldi cnt0, CRYPTO_NPUBBYTES
load_nonce_loop:
ld tmp0, Y+
st X+, tmp0
dec cnt0
brne load_nonce_loop
lds YH, SRAM_KEY_ADDR
lds YL, SRAM_KEY_ADDR + 1
ldi cnt0, CRYPTO_KEYBYTES
load_key_loop:
ld tmp0, Y+
st X+, tmp0
dec cnt0
brne load_key_loop
#if (STATE_INBITS==384) && (RATE_INBITS==192)
ldi cnt0, (STATE_INBYTES - CRYPTO_NPUBBYTES - CRYPTO_KEYBYTES - 1)
clr tmp0
empty_state_loop:
st X+, tmp0
dec cnt0
brne empty_state_loop
ldi tmp0, S384_R192_BITS
st X+, tmp0
#endif
rcall Permutation
ret
ENC:
tst mclen
breq ENC_end
cp mclen, rate
brlo enc_padded_block
enc_loop:
XOR_to_State_ENCDEC
ldi rn, NR_i
rcall Permutation
sub mclen, rate
cp mclen, rate
brlo enc_padded_block
rjmp enc_loop
enc_padded_block:
mov cnt0, mclen
rcall Pad_XOR_to_State
ENC_end:
ret
Finalization:
ldi rate, SQUEEZE_RATE_INBYTES
ldi rn, NR_f
rcall Permutation
rcall EXTRACT_from_State
ret
; void crypto_aead_encrypt_asm(
; unsigned char *c,
; const unsigned char *m,
; unsigned long long mlen,
; const unsigned char *ad,
; unsigned long long adlen,
; const unsigned char *npub,
; const unsigned char *k
; )
;
; unsigned char *c, is passed in r24:r25
; const unsigned char *m, is passed in r22:r23
; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used
; const unsigned char *ad, is passed in r18:r19
; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used
; const unsigned char *npub, is passed in r14:r15
; const unsigned char *k is passed in r12:r13
.global crypto_aead_encrypt_asm
crypto_aead_encrypt_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN
st X+, r20
st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR
st X+, r18
st X+, r17 ;store associated data length in SRAM_ADLEN
st X+, r16
st X+, r15 ;store nonce address in SRAM_NONCE_ADDR
st X+, r14
st X+, r13 ;store key address in SRAM_KEY_ADDR
st X+, r12
mov radlen, r16
mov mclen, r20
rcall Initialization
ldi rn, NR_i
ldi rate, RATE_INBYTES
ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_ASSOCIATED_DATA_ADDR
lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1
rcall AUTH
rcall AddDomain
ldi AEDH, 0b001 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_MESSAGE_IN_ADDR
lds YL, SRAM_MESSAGE_IN_ADDR + 1
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
rcall ENC
rcall Finalization
POP_ALL
ret
; int crypto_aead_decrypt_asm(
; unsigned char *m,
; const unsigned char *c,
; unsigned long long clen,
; const unsigned char *ad,
; unsigned long long adlen,
; const unsigned char *npub,
; const unsigned char *k
; )
;
; unsigned char *m, is passed in r24:r25
; const unsigned char *c, is passed in r22:r23
; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used
; const unsigned char *ad, is passed in r18:r19
; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used
; const unsigned char *npub, is passed in r14:r15
; const unsigned char *k is passed in r12:r13
.global crypto_aead_decrypt_asm
crypto_aead_decrypt_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN
st X+, r20
st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR
st X+, r18
st X+, r17 ;store associated data length in SRAM_ADLEN
st X+, r16
st X+, r15 ;store nonce address in SRAM_NONCE_ADDR
st X+, r14
st X+, r13 ;store key address in SRAM_KEY_ADDR
st X+, r12
mov radlen, r16
mov mclen, r20
rcall Initialization
ldi rn, NR_i
ldi rate, RATE_INBYTES
ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_ASSOCIATED_DATA_ADDR
lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1
rcall AUTH
rcall AddDomain
ldi AEDH, 0b011 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_MESSAGE_IN_ADDR
lds YL, SRAM_MESSAGE_IN_ADDR + 1
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
rcall ENC
ldi ZH, hi8(SRAM_ADDITIONAL)
ldi ZL, lo8(SRAM_ADDITIONAL)
rcall Finalization
sbiw ZL, CRYPTO_ABYTES
ldi cnt0, CRYPTO_ABYTES
compare_tag:
ld tmp0, Z+
ld tmp1, Y+
cp tmp0, tmp1
brne return_tag_not_match
dec cnt0
brne compare_tag
rjmp return_tag_match
return_tag_not_match:
ldi r25, 0xFF
ldi r24, 0xFF
rjmp crypto_aead_decrypt_end
return_tag_match:
clr r25
clr r24
crypto_aead_decrypt_end:
POP_ALL
ret
; #ifdef CRYPTO_AEAD
#endif
#ifdef CRYPTO_HASH
; void crypto_hash_asm(
; unsigned char *out,
; const unsigned char *in,
; unsigned long long inlen
; )
;
; unsigned char *out, is passed in r24:r25
; const unsigned char *in, is passed in r22:r23
; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used
.global crypto_hash_asm
crypto_hash_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN
st X+, r20
mov mclen, r20
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128)
ldi cnt0, STATE_INBYTES - 1
#else
ldi cnt0, STATE_INBYTES
#endif
clr tmp0
zero_state:
st X+, tmp0
dec cnt0
brne zero_state
#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128)
ldi tmp0, S384_R192_BITS
st X+, tmp0
#endif
ldi rn, NR_h
ldi AEDH, 0b100
HASH_ABSORBING:
mov radlen, mclen
tst radlen
breq EMPTY_M
ldi rate, HASH_RATE_INBYTES
lds YH, SRAM_MESSAGE_IN_ADDR
lds YL, SRAM_MESSAGE_IN_ADDR + 1
rcall AUTH
rjmp HASH_SQUEEZING
EMPTY_M:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
ldi tmp0, PAD_BITS
ld tmp1, X
eor tmp1, tmp0
st X, tmp1
rcall Permutation
HASH_SQUEEZING:
ldi rate, HASH_SQUEEZE_RATE_INBYTES
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
ldi tcnt, CRYPTO_BYTES
SQUEEZING_loop:
rcall EXTRACT_from_State
subi tcnt, HASH_SQUEEZE_RATE_INBYTES
breq HASH_SQUEEZING_end
rcall Permutation
rjmp SQUEEZING_loop
HASH_SQUEEZING_end:
POP_ALL
ret
#endif
; Byte Order In AVR 8:
; KNOT-AEAD(128, 256, 64):
; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[0] 0 Tag[ 0]
; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[1] 0 Tag[ 1]
; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[2] 0 Tag[ 2]
; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[3] 0 Tag[ 3]
; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[4] 0 Tag[ 4]
; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[5] 0 Tag[ 5]
; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[6] 0 Tag[ 6]
; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[7] <<< 0 Tag[ 7]
; N[ 8] AEAD_State[ 8] | Perm_row_1[0] 1
; N[ 9] AEAD_State[ 9] | Perm_row_1[1] 1
; N[10] AEAD_State[10] | Perm_row_1[2] 1
; N[11] AEAD_State[11] | Perm_row_1[3] 1
; N[12] AEAD_State[12] | Perm_row_1[4] 1
; N[13] AEAD_State[13] | Perm_row_1[5] 1
; N[14] AEAD_State[14] | Perm_row_1[6] 1
; N[15] AEAD_State[15] | Perm_row_1[7] <<< 1
; K[ 0] AEAD_State[16] | Perm_row_2[0] 8
; K[ 1] AEAD_State[17] | Perm_row_2[1] 8
; K[ 2] AEAD_State[18] | Perm_row_2[2] 8
; K[ 3] AEAD_State[19] | Perm_row_2[3] 8
; K[ 4] AEAD_State[20] | Perm_row_2[4] 8
; K[ 5] AEAD_State[21] | Perm_row_2[5] 8
; K[ 6] AEAD_State[22] | Perm_row_2[6] 8
; K[ 7] AEAD_State[23] | Perm_row_2[7] <<< 8
; K[ 8] AEAD_State[24] | Perm_row_3[0] 25
; K[ 9] AEAD_State[25] | Perm_row_3[1] 25
; K[10] AEAD_State[26] | Perm_row_3[2] 25
; K[11] AEAD_State[27] | Perm_row_3[3] 25
; K[12] AEAD_State[28] | Perm_row_3[4] 25
; K[13] AEAD_State[29] | Perm_row_3[5] 25
; K[14] AEAD_State[30] | Perm_row_3[6] 25
; K[15] AEAD_State[31] | ^0x80 Perm_row_3[7] <<< 25
;
;
; KNOT-AEAD(128, 384, 192):
; Initalization
; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[ 0] 0 Tag[ 0]
; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[ 1] 0 Tag[ 1]
; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[ 2] 0 Tag[ 2]
; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[ 3] 0 Tag[ 3]
; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[ 4] 0 Tag[ 4]
; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[ 5] 0 Tag[ 5]
; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[ 6] 0 Tag[ 6]
; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[ 7] 0 Tag[ 7]
; N[ 8] AEAD_State[ 8] | Message[ 8] 0x00 Perm_row_0[ 8] 0 Tag[ 8]
; N[ 9] AEAD_State[ 9] | Message[ 9] 0x00 Perm_row_0[ 9] 0 Tag[ 9]
; N[10] AEAD_State[10] | Message[10] 0x00 Perm_row_0[10] 0 Tag[10]
; N[11] AEAD_State[11] | Message[11] 0x00 Perm_row_0[11] <<< 0 Tag[11]
; N[12] AEAD_State[12] | Message[12] 0x00 Perm_row_1[ 0] 1 Tag[12]
; N[13] AEAD_State[13] | Message[13] 0x00 Perm_row_1[ 1] 1 Tag[13]
; N[14] AEAD_State[14] | Message[14] 0x00 Perm_row_1[ 2] 1 Tag[14]
; N[15] AEAD_State[15] | Message[15] 0x00 Perm_row_1[ 3] 1 Tag[15]
; K[ 0] AEAD_State[16] | Message[16] 0x00 Perm_row_1[ 4] 1
; K[ 1] AEAD_State[17] | Message[17] 0x00 Perm_row_1[ 5] 1
; K[ 2] AEAD_State[18] | Message[18] 0x00 Perm_row_1[ 6] 1
; K[ 3] AEAD_State[19] | Message[19] 0x00 Perm_row_1[ 7] 1
; K[ 4] AEAD_State[20] | Message[20] 0x00 Perm_row_1[ 8] 1
; K[ 5] AEAD_State[21] | Message[21] 0x00 Perm_row_1[ 9] 1
; K[ 6] AEAD_State[22] | Message[22] 0x00 Perm_row_1[10] 1
; K[ 7] AEAD_State[23] | Message[23] 0x00 Perm_row_1[11] <<< 1
; K[ 8] AEAD_State[24] | Perm_row_2[ 0] 8
; K[ 9] AEAD_State[25] | Perm_row_2[ 1] 8
; K[10] AEAD_State[26] | Perm_row_2[ 2] 8
; K[11] AEAD_State[27] | Perm_row_2[ 3] 8
; K[12] AEAD_State[28] | Perm_row_2[ 4] 8
; K[13] AEAD_State[29] | Perm_row_2[ 5] 8
; K[14] AEAD_State[30] | Perm_row_2[ 6] 8
; K[15] AEAD_State[31] | Perm_row_2[ 7] 8
; 0x00 AEAD_State[32] | Perm_row_2[ 8] 8
; 0x00 AEAD_State[33] | Perm_row_2[ 9] 8
; 0x00 AEAD_State[34] | Perm_row_2[10] 8
; 0x00 AEAD_State[35] | Perm_row_2[11] <<< 8
; 0x00 AEAD_State[36] | Perm_row_3[ 0] 55
; 0x00 AEAD_State[37] | Perm_row_3[ 1] 55
; 0x00 AEAD_State[38] | Perm_row_3[ 2] 55
; 0x00 AEAD_State[39] | Perm_row_3[ 3] 55
; 0x00 AEAD_State[40] | Perm_row_3[ 4] 55
; 0x00 AEAD_State[41] | Perm_row_3[ 5] 55
; 0x00 AEAD_State[42] | Perm_row_3[ 6] 55
; 0x00 AEAD_State[43] | Perm_row_3[ 7] 55
; 0x00 AEAD_State[44] | Perm_row_3[ 8] 55
; 0x00 AEAD_State[45] | Perm_row_3[ 9] 55
; 0x00 AEAD_State[46] | Perm_row_3[10] 55
; 0x00 ^0x80 AEAD_State[47] | ^0x80 Perm_row_3[11] <<< 55
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
#define x10 r0
#define x11 r1
#define x12 r2
#define x13 r3
#define x14 r4
#define x15 r5
#define x16 r6
#define x17 r7
; an intentionally arrangement of registers to facilitate movw
#define x20 r8
#define x21 r10
#define x22 r12
#define x23 r14
#define x24 r9
#define x25 r11
#define x26 r13
#define x27 r15
; an intentionally arrangement of registers to facilitate movw
#define x30 r16
#define x35 r18
#define x32 r20
#define x37 r22
#define x34 r17
#define x31 r19
#define x36 r21
#define x33 r23
#define t0j r24
#define t1j r25
#define x0j r27
#include "assist.h"
.macro Sbox i0, i1, i2, i3
mov t0j, \i1
com \i0
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
ld x10, Y+
ld x11, Y+
ld x12, Y+
ld x13, Y+
ld x14, Y+
ld x15, Y+
ld x16, Y+
ld x17, Y+
ld x20, Y+
ld x21, Y+
ld x22, Y+
ld x23, Y+
ld x24, Y+
ld x25, Y+
ld x26, Y+
ld x27, Y+
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH
rjmp For_Hash
For_AEAD:
ldi ZL, lo8(RC_LFSR6)
ldi ZH, hi8(RC_LFSR6)
rjmp round_loop_start
For_Hash:
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
#elif defined(CRYPTO_AEAD)
ldi ZL, lo8(RC_LFSR6)
ldi ZH, hi8(RC_LFSR6)
#else
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
#endif
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
ld x0j, Y
eor x0j, t0j
; SubColumns
Sbox x0j, x10, x20, x30
st Y+, x0j
ld x0j, Y
Sbox x0j, x11, x21, x31
st Y+, x0j
ld x0j, Y
Sbox x0j, x12, x22, x32
st Y+, x0j
ld x0j, Y
Sbox x0j, x13, x23, x33
st Y+, x0j
ld x0j, Y
Sbox x0j, x14, x24, x34
st Y+, x0j
ld x0j, Y
Sbox x0j, x15, x25, x35
st Y+, x0j
ld x0j, Y
Sbox x0j, x16, x26, x36
st Y+, x0j
ld x0j, Y
Sbox x0j, x17, x27, x37
st Y, x0j
; ShiftRows
; <<< 1
mov t0j, x17
rol t0j
rol x10
rol x11
rol x12
rol x13
rol x14
rol x15
rol x16
rol x17
; <<< 8
; 7 6 5 4 3 2 1 0 => 6 5 4 3 2 1 0 7
;mov t0j, x27
;mov x27, x26
;mov x26, x25
;mov x25, x24
;mov x24, x23
;mov x23, x22
;mov x22, x21
;mov x21, x20
;mov x20, t0j
; an intentionally arrangement of registers to facilitate movw
movw t0j, x23 ; t1j:t0j <= x27:x23
movw x23, x22 ; x27:x23 <= x26:x22
movw x22, x21 ; x26:x22 <= x25:x21
movw x21, x20 ; x25:x21 <= x24:x20
mov x20, t1j ; x20 <= t1j
mov x24, t0j ; x24 <= t0j
; <<< 1
mov t0j, x37
rol t0j
rol x30
rol x31
rol x32
rol x33
rol x34
rol x35
rol x36
rol x37
; <<< 24
; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5
;mov t0j, x30
;mov x30, x35
;mov x35, x32
;mov x32, x37
;mov x37, x34
;mov x34, x31
;mov x31, x36
;mov x36, x33
;mov x33, t0j
; an intentionally arrangement of registers to facilitate movw
;x30 r16
;x35 r18
;x32 r20
;x37 r22
;x34 r17
;x31 r19
;x36 r21
;x33 r23
movw t0j, x30 ; t1j:t0j <= x34:x30
movw x30, x35 ; x34:x30 <= x31:x35
movw x35, x32 ; x31:x35 <= x36:x32
movw x32, x37 ; x36:x32 <= x33:x37
mov x37, t1j ; x37 <= x34
mov x33, t0j ; x33 <= x30
dec rcnt
breq round_loop_end
jmp round_loop_start
round_loop_end:
ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
st Y+, x10
st Y+, x11
st Y+, x12
st Y+, x13
st Y+, x14
st Y+, x15
st Y+, x16
st Y+, x17
st Y+, x20
st Y+, x21
st Y+, x22
st Y+, x23
st Y+, x24
st Y+, x25
st Y+, x26
st Y+, x27
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
POP_CONFLICT
ret
.section .text
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
RC_LFSR6:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06
.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29
.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28
.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24
.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37
.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26
.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f
.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#elif defined(CRYPTO_AEAD)
RC_LFSR6:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06
.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29
.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28
.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24
.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37
.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26
.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f
.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00
#else
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#endif
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
; an intentionally arrangement of registers to facilitate movw
#define x20 r0
#define x21 r2
#define x22 r4
#define x23 r6
#define x24 r8
#define x25 r10
#define x26 r1
#define x27 r3
#define x28 r5
#define x29 r7
#define x2a r9
#define x2b r11
; an intentionally arrangement of registers to facilitate movw
#define x30 r22
#define x35 r20
#define x3a r18
#define x33 r16
#define x38 r14
#define x31 r12
#define x36 r23
#define x3b r21
#define x34 r19
#define x39 r17
#define x32 r15
#define x37 r13
#define t0j r24
#define t1j r25
#define x0j r25
#define x1j r27
#include "assist.h"
.macro Sbox i0, i1, i2, i3
ldi t0j, 0xFF
eor \i0, t0j
mov t0j, \i1
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
.macro OneColumn i0, i1, i2, i3
ld \i0, Y
ldd \i1, Y + ROW_INBYTES
Sbox \i0, \i1, \i2, \i3
st Y+, \i0
rol \i1 ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES -1, \i1
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
ld x20, Y+
ld x21, Y+
ld x22, Y+
ld x23, Y+
ld x24, Y+
ld x25, Y+
ld x26, Y+
ld x27, Y+
ld x28, Y+
ld x29, Y+
ld x2a, Y+
ld x2b, Y+
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
ld x38, Y+
ld x39, Y+
ld x3a, Y+
ld x3b, Y+
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
ld x0j, Y
eor x0j, t0j
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x20, x30
st Y+, x0j
lsl x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES -1, x1j
OneColumn x0j, x1j, x21, x31
OneColumn x0j, x1j, x22, x32
OneColumn x0j, x1j, x23, x33
OneColumn x0j, x1j, x24, x34
OneColumn x0j, x1j, x25, x35
OneColumn x0j, x1j, x26, x36
OneColumn x0j, x1j, x27, x37
OneColumn x0j, x1j, x28, x38
OneColumn x0j, x1j, x29, x39
OneColumn x0j, x1j, x2a, x3a
OneColumn x0j, x1j, x2b, x3b
ld x1j, Y
eor t0j, t0j
adc x1j, t0j
st Y, x1j
; b a 9 8 7 6 5 4 3 2 1 0
; -- -- -- -- -- -- -- -- -- -- -- x- 0
; -- -- -- -- -- -- -- -- -- -- -- x' 0
; -- -- -- -- -- -- -- -- -- -- x- -- 1
; -- -- -- -- x' -- -- -- -- -- -- -- 7
; 4 3 2 1 0 b a 9 8 7 6 5
; ShiftRows -- the last two rows
; <<< 8
; b a 9 8 7 6 5 4 3 2 1 0 => a 9 8 7 6 5 4 3 2 1 0 b
movw t0j, x25 ; t1j:t0j <= x2b:x25
movw x25, x24 ; x2b:x25 <= x2a:x24
movw x24, x23 ; x2a:x24 <= x29:x23
movw x23, x22 ; x29:x23 <= x28:x22
movw x22, x21 ; x28:x22 <= x27:x21
movw x21, x20 ; x27:x21 <= x26:x20
mov x26, t0j ; x26 <= x25
mov x20, t1j ; x20 <= x2b
; >>> 1
mov t0j, x3b
ror t0j
ror x3a
ror x39
ror x38
ror x37
ror x36
ror x35
ror x34
ror x33
ror x32
ror x31
ror x30
ror x3b
; <<< 56
; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5
; mov x3j, x30
; mov x30, x35
; mov x35, x3a
; mov x3a, x33
; mov x33, x38
; mov x38, x31
; mov x31, x36
; mov x36, x3b
; mov x3b, x34
; mov x34, x39
; mov x39, x32
; mov x32, x37
; mov x37, x3j
; an intentionally arrangement of registers to facilitate movw
; x30 r22
; x35 r20
; x3a r18
; x33 r16
; x38 r14
; x31 r12
; x36 r23
; x3b r21
; x34 r19
; x39 r17
; x32 r15
; x37 r13
movw t0j, x30 ; t1j:t0j <= x36:x30
movw x30, x35 ; x36:x30 <= x3b:x35
movw x35, x3a ; x3b:x35 <= x34:x3a
movw x3a, x33 ; x34:x3a <= x39:x33
movw x33, x38 ; x39:x33 <= x32:x38
movw x38, x31 ; x32:x38 <= x37:x31
mov x31, t1j ; x31 <= x36
mov x37, t0j ; x37 <= x30
dec rcnt
breq round_loop_end
jmp round_loop_start
round_loop_end:
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
st Y+, x20
st Y+, x21
st Y+, x22
st Y+, x23
st Y+, x24
st Y+, x25
st Y+, x26
st Y+, x27
st Y+, x28
st Y+, x29
st Y+, x2a
st Y+, x2b
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
st Y+, x38
st Y+, x39
st Y+, x3a
st Y+, x3b
POP_CONFLICT
ret
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
#define x20 r0
#define x22 r2
#define x24 r4
#define x26 r6
#define x28 r1
#define x2a r3
#define x2c r5
#define x2e r7
#define x30 r8
#define x3d r10
#define x3a r12
#define x37 r14
#define x34 r16
#define x31 r18
#define x3e r20
#define x3b r22
#define x38 r9
#define x35 r11
#define x32 r13
#define x3f r15
#define x3c r17
#define x39 r19
#define x36 r21
#define x33 r23
#define t0j r24
#define t1j r25
#define x0j r25
#define x1j r27
#define x2j r26
#include "assist.h"
.macro Sbox i0, i1, i2, i3
ldi t0j, 0xFF
eor \i0, t0j
mov t0j, \i1
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
.macro TwoColumns i2_e, i3_e, i3_o
; column 2i
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, \i2_e, \i3_e
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
; column 2i+1
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x2j, \i3_o
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
ldd t0j, Y + 2 * ROW_INBYTES + 1
std Y + 2 * ROW_INBYTES + 1, x2j
mov x2j, t0j
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
push rcnt
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
ldd x20, Y + 0x00
ldd x22, Y + 0x02
ldd x24, Y + 0x04
ldd x26, Y + 0x06
ldd x28, Y + 0x08
ldd x2a, Y + 0x0a
ldd x2c, Y + 0x0c
ldd x2e, Y + 0x0e
adiw YL, ROW_INBYTES
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
ld x38, Y+
ld x39, Y+
ld x3a, Y+
ld x3b, Y+
ld x3c, Y+
ld x3d, Y+
ld x3e, Y+
ld x3f, Y+
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH
rjmp For_Hash
For_AEAD:
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
rjmp round_loop_start
For_Hash:
ldi ZL, lo8(RC_LFSR8)
ldi ZH, hi8(RC_LFSR8)
#elif defined(CRYPTO_AEAD)
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
#else
ldi ZL, lo8(RC_LFSR8)
ldi ZH, hi8(RC_LFSR8)
#endif
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
; column 0
ld x0j, Y
eor x0j, t0j
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x20, x30
st Y+, x0j
lsl x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
; column 1
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
ldd x2j, Y + 2 * ROW_INBYTES
Sbox x0j, x1j, x2j, x31
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
ldd t0j, Y + 2 * ROW_INBYTES + 1
std Y + 2 * ROW_INBYTES + 1, x2j
mov x2j, t0j
; column 2, 3
TwoColumns x22, x32, x33
; column 4, 5
TwoColumns x24, x34, x35
; column 6, 7
TwoColumns x26, x36, x37
; column 8, 9
TwoColumns x28, x38, x39
; column 10, 11
TwoColumns x2a, x3a, x3b
; column 12, 13
TwoColumns x2c, x3c, x3d
; column 14
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x2e, x3e
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
; column 15
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x2j, x3f
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
ld x1j, Y
eor t0j, t0j
adc x1j, t0j
st Y, x1j
std Y + ROW_INBYTES + 1, x2j
; f e d c b a 9 8 7 6 5 4 3 2 1 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2
; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3
; c b a 9 8 7 6 5 4 3 2 1 0 f e d
; x2e x2c x2a x28 x26 x24 x22 x20 => x2c x2a x28 x26 x24 x22 x20 x2e
;mov t0j, x2e
;mov x2e, x2c
;mov x2c, x2a
;mov x2a, x28
;mov x28, x26
;mov x26, x24
;mov x24, x22
;mov x22, x20
;mov x20, t0j
; an intentionally arrangement of registers to facilitate movw
movw t0j, x26 ; t1j:t0j <= x2e:x26
movw x26, x24 ; x2e:x26 <= x2c:x24
movw x24, x22 ; x2c:x24 <= x2a:x22
movw x22, x20 ; x2a:x22 <= x28:x20
mov x20, t1j ; x20 <= t1j
mov x28, t0j ; x28 <= t0j
; <<< 1
mov t0j, x3f
rol t0j
rol x30
rol x31
rol x32
rol x33
rol x34
rol x35
rol x36
rol x37
rol x38
rol x39
rol x3a
rol x3b
rol x3c
rol x3d
rol x3e
rol x3f
; <<< 24
; f e d c b a 9 8 7 6 5 4 3 2 1 0 =>
; c b a 9 8 7 6 5 4 3 2 1 0 f e d
; mov x3j, x30
; mov x30, x3d
; mov x3d, x3a
; mov x3a, x37
; mov x37, x34
; mov x34, x31
; mov x31, x3e
; mov x3e, x3b
; mov x3b, x38
; mov x38, x35
; mov x35, x32
; mov x32, x3f
; mov x3f, x3c
; mov x3c, x39
; mov x39, x36
; mov x36, x33
; mov x33, x3j
; an intentionally arrangement of registers to facilitate movw
; x30 r8
; x3d r10
; x3a r12
; x37 r14
; x34 r16
; x31 r18
; x3e r20
; x3b r22
; x38 r9
; x35 r11
; x32 r13
; x3f r15
; x3c r17
; x39 r19
; x36 r21
; x33 r23
movw t0j, x30 ; t1j:t0j <= x38:x30
movw x30, x3d ; x38:x30 <= x35:x3d
movw x3d, x3a ; x35:x3d <= x32:x3a
movw x3a, x37 ; x32:x3a <= x3f:x37
movw x37, x34 ; x3f:x37 <= x3c:x34
movw x34, x31 ; x3c:x34 <= x39:x31
movw x31, x3e ; x39:x31 <= x36:x3e
movw x3e, x3b ; x36:x3e <= x33:x3b
mov x3b, t1j ; x3b <= x38
mov x33, t0j ; x33 <= x30
pop rcnt
dec rcnt
push rcnt
breq round_loop_end
rjmp round_loop_start
round_loop_end:
pop rcnt
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
std Y + 0x00, x20
std Y + 0x02, x22
std Y + 0x04, x24
std Y + 0x06, x26
std Y + 0x08, x28
std Y + 0x0a, x2a
std Y + 0x0c, x2c
std Y + 0x0e, x2e
adiw YL, ROW_INBYTES
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
st Y+, x38
st Y+, x39
st Y+, x3a
st Y+, x3b
st Y+, x3c
st Y+, x3d
st Y+, x3e
st Y+, x3f
POP_CONFLICT
ret
.section .text
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
RC_LFSR8:
.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e
.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25
.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0
.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9
.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37
.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20
.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad
.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac
.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0
.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb
.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a
.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8
.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86
.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7
.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c
.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5
.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4
.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52
.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7
.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76
.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b
.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4
.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd
.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46
.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e
.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab
.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5
.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09
.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe
.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17
.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d
.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00
#elif defined(CRYPTO_AEAD)
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#else
RC_LFSR8:
.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e
.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25
.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0
.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9
.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37
.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20
.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad
.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac
.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0
.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb
.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a
.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8
.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86
.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7
.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c
.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5
.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4
.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52
.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7
.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76
.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b
.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4
.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd
.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46
.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e
.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab
.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5
.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09
.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe
.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17
.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d
.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00
#endif
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
;
; ============================================
; R E G I S T E R D E F I N I T I O N S
; ============================================
;
#define mclen r16
#define radlen r17
#define tcnt r17
#define tmp0 r20
#define tmp1 r21
#define cnt0 r22
#define rn r23
#define rate r24
; AEDH = 0b000: for authenticate AD
; AEDH = 0b001: for encryption
; AEDH = 0b011: for decryption
; AEDH = 0b100: for hash
#define AEDH r25
#define rcnt r26
#if (STATE_INBITS==256)
#include "knot256.h"
#elif (STATE_INBITS==384)
#include "knot384.h"
#elif (STATE_INBITS==512)
#include "knot512.h"
#else
#error "Not specified key size and state size"
#endif
#define CRYPTO_KEYBYTES 24
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 24
#define CRYPTO_ABYTES 24
#define CRYPTO_NOOVERLAP 1
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
.macro PUSH_CONFLICT
push r16
push r17
push r18
push r19
push r23
push r24
push r25
push r26
push r27
push r28
push r29
push r30
push r31
.endm
.macro POP_CONFLICT
pop r31
pop r30
pop r29
pop r28
pop r27
pop r26
pop r25
pop r24
pop r23
pop r19
pop r18
pop r17
pop r16
.endm
.macro PUSH_ALL
push r2
push r3
push r4
push r5
push r6
push r7
push r8
push r9
push r10
push r11
push r12
push r13
push r14
push r15
push r16
push r17
push r28
push r29
.endm
.macro POP_ALL
pop r29
pop r28
pop r17
pop r16
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
pop r9
pop r8
pop r7
pop r6
pop r5
pop r4
pop r3
pop r2
clr r1
.endm
\ No newline at end of file
#ifndef __CONFIG_H__
#define __CONFIG_H__
#define CRYPTO_AEAD
//#define CRYPTO_HASH
#define MAX_MESSAGE_LENGTH 128
#define STATE_INBITS 384
/* For CRYPTO_AEAD */
#define CRYPTO_KEYBITS 192
/* For CRYPTO_HASH */
#define CRYPTO_BITS 384
#define STATE_INBYTES ((STATE_INBITS + 7) / 8)
#define ROW_INBITS ((STATE_INBITS + 3) / 4)
#define ROW_INBYTES ((ROW_INBITS + 7) / 8)
/* For CRYPTO_AEAD */
#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8)
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES
#define CRYPTO_ABYTES CRYPTO_KEYBYTES
#define CRYPTO_NOOVERLAP 1
#define MAX_ASSOCIATED_DATA_LENGTH 32
#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES)
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
/* For CRYPTO_HASH */
#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8)
#define DOMAIN_BITS 0x80
#define PAD_BITS 0x01
#define S384_R192_BITS 0x80
#if (STATE_INBITS==256)
#define C1 1
#define C2 8
#define C3 25
#elif (STATE_INBITS==384)
#define C1 1
#define C2 8
#define C3 55
#elif (STATE_INBITS==512)
#define C1 1
#define C2 16
#define C3 25
#else
#error "Not specified state size"
#endif
#ifdef CRYPTO_AEAD
/* For CRYPTO_AEAD */
#define KEY_INBITS (CRYPTO_KEYBYTES * 8)
#define KEY_INBYTES (CRYPTO_KEYBYTES)
#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8)
#define NONCE_INBYTES (CRYPTO_NPUBBYTES)
#define TAG_INBITS (CRYPTO_ABYTES * 8)
#define TAG_INBYTES (CRYPTO_ABYTES)
#if (KEY_INBITS==128) && (STATE_INBITS==256)
#define RATE_INBITS 64
#define NR_0 52
#define NR_i 28
#define NR_f 32
#elif (KEY_INBITS==128) && (STATE_INBITS==384)
#define RATE_INBITS 192
#define NR_0 76
#define NR_i 28
#define NR_f 32
#elif (KEY_INBITS==192) && (STATE_INBITS==384)
#define RATE_INBITS 96
#define NR_0 76
#define NR_i 40
#define NR_f 44
#elif (KEY_INBITS==256) && (STATE_INBITS==512)
#define RATE_INBITS 128
#define NR_0 100
#define NR_i 52
#define NR_f 56
#else
#error "Not specified key size and state size"
#endif
#define RATE_INBYTES ((RATE_INBITS + 7) / 8)
#define SQUEEZE_RATE_INBYTES TAG_INBYTES
#endif
#ifdef CRYPTO_HASH
/* For CRYPTO_HASH */
#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8)
#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256)
#define HASH_RATE_INBITS 32
#define HASH_SQUEEZE_RATE_INBITS 128
#define NR_h 68
#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384)
#define HASH_RATE_INBITS 128
#define HASH_SQUEEZE_RATE_INBITS 128
#define NR_h 80
#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384)
#define HASH_RATE_INBITS 48
#define HASH_SQUEEZE_RATE_INBITS 192
#define NR_h 104
#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512)
#define HASH_RATE_INBITS 64
#define HASH_SQUEEZE_RATE_INBITS 256
#define NR_h 140
#else
#error "Not specified hash digest size and state size"
#endif
#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8)
#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8)
#endif
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
#endif
\ No newline at end of file
#ifdef __cplusplus
extern "C" {
#endif
int crypto_aead_encrypt(
unsigned char *c,unsigned long long *clen,
const unsigned char *m,unsigned long long mlen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m,unsigned long long *outputmlen,
unsigned char *nsec,
const unsigned char *c,unsigned long long clen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
#ifdef __cplusplus
}
#endif
#include <avr/io.h>
#include <avr/sfr_defs.h>
#include <stdlib.h>
#include <string.h>
#include "config.h"
extern void crypto_aead_encrypt_asm(
unsigned char *c,
const unsigned char *m,
unsigned char mlen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern int crypto_aead_decrypt_asm(
unsigned char *m,
const unsigned char *c,
unsigned char clen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern void crypto_hash_asm(
unsigned char *out,
const unsigned char *in,
unsigned char inlen
);
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the cipher implementation goes here,
... generating a ciphertext c[0],c[1],...,c[*clen-1]
... from a plaintext m[0],m[1],...,m[mlen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce npub[0],npub[1],..
... and secret key k[0],k[1],...
... the implementation shall not use nsec
...
... return 0;
*/
(void)nsec;
crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k);
*clen = mlen + TAG_INBYTES;
return 0;
}
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the AEAD implementation goes here,
... generating a plaintext m[0],m[1],...,m[*mlen-1]
... and secret message number nsec[0],nsec[1],...
... from a ciphertext c[0],c[1],...,c[clen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce number npub[0],npub[1],...
... and secret key k[0],k[1],...
...
... return 0;
*/
unsigned long long mlen_;
unsigned char tag_is_match;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
return -1;
}
mlen_ = clen - CRYPTO_ABYTES;
tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k);
if (tag_is_match != 0)
{
memset(m, 0, (size_t)mlen_);
return -1;
}
*mlen = mlen_;
return 0;
}
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
;
; ============================================
; S R A M D E F I N I T I O N S
; ============================================
;
#include <avr/io.h>
#include "config.h"
.section .noinit
SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#if (STATE_INBYTES > 32)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
#if (STATE_INBYTES > 48)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0
SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0
SRAM_MESSAGE_IN_LEN: .BYTE 0, 0
#ifdef CRYPTO_AEAD
; For CRYPTO_AEAD
SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0
SRAM_ADLEN: .BYTE 0, 0
SRAM_NONCE_ADDR: .BYTE 0, 0
SRAM_KEY_ADDR: .BYTE 0, 0
SRAM_ADDITIONAL:
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#if (CRYPTO_ABYTES > 16)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
#if (CRYPTO_ABYTES > 24)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
#endif
.section .text
#include "permutation.h"
; require YH:YL be the address of the current associated data/cipher/message block
; for enc and dec, store ciphertext or plaintext
; require ZH:ZL be the address of the current cipher/message block
.macro XOR_to_State_ENCDEC
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
mov cnt0, rate
XOR_to_State_loop_ENCDEC:
ld tmp0, Y+ ; plaintext/ciphertext
ld tmp1, X ; state
eor tmp1, tmp0 ; ciphertext/plaintext
st Z+, tmp1 ; store ciphertext/plaintext
sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte
mov tmp1, tmp0 ; if dec, replace state
st X+, tmp1 ; store state byte
dec cnt0
brne XOR_to_State_loop_ENCDEC
; YH:YL are now the address of the next associated data block
.endm
; require YH:YL be the address of the current associated data/cipher/message block
; for enc and dec, store ciphertext or plaintext
; require ZH:ZL be the address of the current cipher/message block
.macro XOR_to_State_AUTH
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
mov cnt0, rate
XOR_to_State_loop_AUTH:
ld tmp0, Y+ ; plaintext/ciphertext
ld tmp1, X ; state
eor tmp1, tmp0 ; ciphertext/plaintext
st X+, tmp1 ; store state byte
dec cnt0
brne XOR_to_State_loop_AUTH
; YH:YL are now the address of the next associated data block
.endm
; require YH:YL pointed to the input data
; require ZH:ZL pointed to the output data
; require cnt0 containes the nubmer of bytes in source data
; require number of bytes in source data less than rate, i.e., 0 <= cnt0 < rate
;
; the 0th bit in AEDH is used to distinguish (auth AD) or (enc/dec M/C):
; AEDH[0] = 0 for (auth AD), AEDH[0] = 1 for (enc/dec M/C)
; the 1th bit in AEDH is used to distinguish (auth AD/enc M) or (dec C):
; AEDH[1] = 0 for (auth AD/enc M), AEDH[1] = 1 for (dec C)
; AEDH = 0b000 for (auth AD)
; AEDH = 0b001 for (enc M)
; AEDH = 0b011 for (dec C)
Pad_XOR_to_State:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
tst cnt0
breq XOR_padded_data
XOR_source_data_loop:
ld tmp0, Y+ ; plaintext/ciphertext
ld tmp1, X ; state
eor tmp1, tmp0 ; ciphertext/plaintext
sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result
st Z+, tmp1 ; store ciphertext/plaintext
sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte
mov tmp1, tmp0 ; if dec, replace state
st X+, tmp1 ; store state byte
dec cnt0
brne XOR_source_data_loop
XOR_padded_data:
ldi tmp0, PAD_BITS
ld tmp1, X
eor tmp1, tmp0
st X, tmp1
ret
AddDomain:
ldi XH, hi8(SRAM_STATE + STATE_INBYTES - 1)
ldi XL, lo8(SRAM_STATE + STATE_INBYTES - 1)
ldi tmp0, DOMAIN_BITS
ld tmp1, X
eor tmp0, tmp1
st X, tmp0
ret
; require ZH:ZL be the address of the destination
EXTRACT_from_State:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
mov tmp1, rate
EXTRACT_from_State_loop:
ld tmp0, X+
st Z+, tmp0
dec tmp1
brne EXTRACT_from_State_loop
ret
AUTH:
tst radlen
breq AUTH_end
cp radlen, rate
brlo auth_ad_padded_block
auth_ad_loop:
XOR_to_State_AUTH
rcall Permutation
sub radlen, rate
cp radlen, rate
brlo auth_ad_padded_block
rjmp auth_ad_loop
auth_ad_padded_block:
mov cnt0, radlen
rcall Pad_XOR_to_State
rcall Permutation
AUTH_end:
ret
#ifdef CRYPTO_AEAD
Initialization:
ldi rn, NR_0
ldi XL, lo8(SRAM_STATE)
ldi XH, hi8(SRAM_STATE)
lds YH, SRAM_NONCE_ADDR
lds YL, SRAM_NONCE_ADDR + 1
ldi cnt0, CRYPTO_NPUBBYTES
load_nonce_loop:
ld tmp0, Y+
st X+, tmp0
dec cnt0
brne load_nonce_loop
lds YH, SRAM_KEY_ADDR
lds YL, SRAM_KEY_ADDR + 1
ldi cnt0, CRYPTO_KEYBYTES
load_key_loop:
ld tmp0, Y+
st X+, tmp0
dec cnt0
brne load_key_loop
#if (STATE_INBITS==384) && (RATE_INBITS==192)
ldi cnt0, (STATE_INBYTES - CRYPTO_NPUBBYTES - CRYPTO_KEYBYTES - 1)
clr tmp0
empty_state_loop:
st X+, tmp0
dec cnt0
brne empty_state_loop
ldi tmp0, S384_R192_BITS
st X+, tmp0
#endif
rcall Permutation
ret
ENC:
tst mclen
breq ENC_end
cp mclen, rate
brlo enc_padded_block
enc_loop:
XOR_to_State_ENCDEC
ldi rn, NR_i
rcall Permutation
sub mclen, rate
cp mclen, rate
brlo enc_padded_block
rjmp enc_loop
enc_padded_block:
mov cnt0, mclen
rcall Pad_XOR_to_State
ENC_end:
ret
Finalization:
ldi rate, SQUEEZE_RATE_INBYTES
ldi rn, NR_f
rcall Permutation
rcall EXTRACT_from_State
ret
; void crypto_aead_encrypt_asm(
; unsigned char *c,
; const unsigned char *m,
; unsigned long long mlen,
; const unsigned char *ad,
; unsigned long long adlen,
; const unsigned char *npub,
; const unsigned char *k
; )
;
; unsigned char *c, is passed in r24:r25
; const unsigned char *m, is passed in r22:r23
; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used
; const unsigned char *ad, is passed in r18:r19
; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used
; const unsigned char *npub, is passed in r14:r15
; const unsigned char *k is passed in r12:r13
.global crypto_aead_encrypt_asm
crypto_aead_encrypt_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN
st X+, r20
st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR
st X+, r18
st X+, r17 ;store associated data length in SRAM_ADLEN
st X+, r16
st X+, r15 ;store nonce address in SRAM_NONCE_ADDR
st X+, r14
st X+, r13 ;store key address in SRAM_KEY_ADDR
st X+, r12
mov radlen, r16
mov mclen, r20
rcall Initialization
ldi rn, NR_i
ldi rate, RATE_INBYTES
ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_ASSOCIATED_DATA_ADDR
lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1
rcall AUTH
rcall AddDomain
ldi AEDH, 0b001 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_MESSAGE_IN_ADDR
lds YL, SRAM_MESSAGE_IN_ADDR + 1
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
rcall ENC
rcall Finalization
POP_ALL
ret
; int crypto_aead_decrypt_asm(
; unsigned char *m,
; const unsigned char *c,
; unsigned long long clen,
; const unsigned char *ad,
; unsigned long long adlen,
; const unsigned char *npub,
; const unsigned char *k
; )
;
; unsigned char *m, is passed in r24:r25
; const unsigned char *c, is passed in r22:r23
; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used
; const unsigned char *ad, is passed in r18:r19
; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used
; const unsigned char *npub, is passed in r14:r15
; const unsigned char *k is passed in r12:r13
.global crypto_aead_decrypt_asm
crypto_aead_decrypt_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN
st X+, r20
st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR
st X+, r18
st X+, r17 ;store associated data length in SRAM_ADLEN
st X+, r16
st X+, r15 ;store nonce address in SRAM_NONCE_ADDR
st X+, r14
st X+, r13 ;store key address in SRAM_KEY_ADDR
st X+, r12
mov radlen, r16
mov mclen, r20
rcall Initialization
ldi rn, NR_i
ldi rate, RATE_INBYTES
ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_ASSOCIATED_DATA_ADDR
lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1
rcall AUTH
rcall AddDomain
ldi AEDH, 0b011 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_MESSAGE_IN_ADDR
lds YL, SRAM_MESSAGE_IN_ADDR + 1
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
rcall ENC
ldi ZH, hi8(SRAM_ADDITIONAL)
ldi ZL, lo8(SRAM_ADDITIONAL)
rcall Finalization
sbiw ZL, CRYPTO_ABYTES
ldi cnt0, CRYPTO_ABYTES
compare_tag:
ld tmp0, Z+
ld tmp1, Y+
cp tmp0, tmp1
brne return_tag_not_match
dec cnt0
brne compare_tag
rjmp return_tag_match
return_tag_not_match:
ldi r25, 0xFF
ldi r24, 0xFF
rjmp crypto_aead_decrypt_end
return_tag_match:
clr r25
clr r24
crypto_aead_decrypt_end:
POP_ALL
ret
; #ifdef CRYPTO_AEAD
#endif
#ifdef CRYPTO_HASH
; void crypto_hash_asm(
; unsigned char *out,
; const unsigned char *in,
; unsigned long long inlen
; )
;
; unsigned char *out, is passed in r24:r25
; const unsigned char *in, is passed in r22:r23
; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used
.global crypto_hash_asm
crypto_hash_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN
st X+, r20
mov mclen, r20
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128)
ldi cnt0, STATE_INBYTES - 1
#else
ldi cnt0, STATE_INBYTES
#endif
clr tmp0
zero_state:
st X+, tmp0
dec cnt0
brne zero_state
#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128)
ldi tmp0, S384_R192_BITS
st X+, tmp0
#endif
ldi rn, NR_h
ldi AEDH, 0b100
HASH_ABSORBING:
mov radlen, mclen
tst radlen
breq EMPTY_M
ldi rate, HASH_RATE_INBYTES
lds YH, SRAM_MESSAGE_IN_ADDR
lds YL, SRAM_MESSAGE_IN_ADDR + 1
rcall AUTH
rjmp HASH_SQUEEZING
EMPTY_M:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
ldi tmp0, PAD_BITS
ld tmp1, X
eor tmp1, tmp0
st X, tmp1
rcall Permutation
HASH_SQUEEZING:
ldi rate, HASH_SQUEEZE_RATE_INBYTES
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
ldi tcnt, CRYPTO_BYTES
SQUEEZING_loop:
rcall EXTRACT_from_State
subi tcnt, HASH_SQUEEZE_RATE_INBYTES
breq HASH_SQUEEZING_end
rcall Permutation
rjmp SQUEEZING_loop
HASH_SQUEEZING_end:
POP_ALL
ret
#endif
; Byte Order In AVR 8:
; KNOT-AEAD(128, 256, 64):
; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[0] 0 Tag[ 0]
; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[1] 0 Tag[ 1]
; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[2] 0 Tag[ 2]
; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[3] 0 Tag[ 3]
; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[4] 0 Tag[ 4]
; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[5] 0 Tag[ 5]
; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[6] 0 Tag[ 6]
; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[7] <<< 0 Tag[ 7]
; N[ 8] AEAD_State[ 8] | Perm_row_1[0] 1
; N[ 9] AEAD_State[ 9] | Perm_row_1[1] 1
; N[10] AEAD_State[10] | Perm_row_1[2] 1
; N[11] AEAD_State[11] | Perm_row_1[3] 1
; N[12] AEAD_State[12] | Perm_row_1[4] 1
; N[13] AEAD_State[13] | Perm_row_1[5] 1
; N[14] AEAD_State[14] | Perm_row_1[6] 1
; N[15] AEAD_State[15] | Perm_row_1[7] <<< 1
; K[ 0] AEAD_State[16] | Perm_row_2[0] 8
; K[ 1] AEAD_State[17] | Perm_row_2[1] 8
; K[ 2] AEAD_State[18] | Perm_row_2[2] 8
; K[ 3] AEAD_State[19] | Perm_row_2[3] 8
; K[ 4] AEAD_State[20] | Perm_row_2[4] 8
; K[ 5] AEAD_State[21] | Perm_row_2[5] 8
; K[ 6] AEAD_State[22] | Perm_row_2[6] 8
; K[ 7] AEAD_State[23] | Perm_row_2[7] <<< 8
; K[ 8] AEAD_State[24] | Perm_row_3[0] 25
; K[ 9] AEAD_State[25] | Perm_row_3[1] 25
; K[10] AEAD_State[26] | Perm_row_3[2] 25
; K[11] AEAD_State[27] | Perm_row_3[3] 25
; K[12] AEAD_State[28] | Perm_row_3[4] 25
; K[13] AEAD_State[29] | Perm_row_3[5] 25
; K[14] AEAD_State[30] | Perm_row_3[6] 25
; K[15] AEAD_State[31] | ^0x80 Perm_row_3[7] <<< 25
;
;
; KNOT-AEAD(128, 384, 192):
; Initalization
; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[ 0] 0 Tag[ 0]
; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[ 1] 0 Tag[ 1]
; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[ 2] 0 Tag[ 2]
; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[ 3] 0 Tag[ 3]
; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[ 4] 0 Tag[ 4]
; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[ 5] 0 Tag[ 5]
; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[ 6] 0 Tag[ 6]
; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[ 7] 0 Tag[ 7]
; N[ 8] AEAD_State[ 8] | Message[ 8] 0x00 Perm_row_0[ 8] 0 Tag[ 8]
; N[ 9] AEAD_State[ 9] | Message[ 9] 0x00 Perm_row_0[ 9] 0 Tag[ 9]
; N[10] AEAD_State[10] | Message[10] 0x00 Perm_row_0[10] 0 Tag[10]
; N[11] AEAD_State[11] | Message[11] 0x00 Perm_row_0[11] <<< 0 Tag[11]
; N[12] AEAD_State[12] | Message[12] 0x00 Perm_row_1[ 0] 1 Tag[12]
; N[13] AEAD_State[13] | Message[13] 0x00 Perm_row_1[ 1] 1 Tag[13]
; N[14] AEAD_State[14] | Message[14] 0x00 Perm_row_1[ 2] 1 Tag[14]
; N[15] AEAD_State[15] | Message[15] 0x00 Perm_row_1[ 3] 1 Tag[15]
; K[ 0] AEAD_State[16] | Message[16] 0x00 Perm_row_1[ 4] 1
; K[ 1] AEAD_State[17] | Message[17] 0x00 Perm_row_1[ 5] 1
; K[ 2] AEAD_State[18] | Message[18] 0x00 Perm_row_1[ 6] 1
; K[ 3] AEAD_State[19] | Message[19] 0x00 Perm_row_1[ 7] 1
; K[ 4] AEAD_State[20] | Message[20] 0x00 Perm_row_1[ 8] 1
; K[ 5] AEAD_State[21] | Message[21] 0x00 Perm_row_1[ 9] 1
; K[ 6] AEAD_State[22] | Message[22] 0x00 Perm_row_1[10] 1
; K[ 7] AEAD_State[23] | Message[23] 0x00 Perm_row_1[11] <<< 1
; K[ 8] AEAD_State[24] | Perm_row_2[ 0] 8
; K[ 9] AEAD_State[25] | Perm_row_2[ 1] 8
; K[10] AEAD_State[26] | Perm_row_2[ 2] 8
; K[11] AEAD_State[27] | Perm_row_2[ 3] 8
; K[12] AEAD_State[28] | Perm_row_2[ 4] 8
; K[13] AEAD_State[29] | Perm_row_2[ 5] 8
; K[14] AEAD_State[30] | Perm_row_2[ 6] 8
; K[15] AEAD_State[31] | Perm_row_2[ 7] 8
; 0x00 AEAD_State[32] | Perm_row_2[ 8] 8
; 0x00 AEAD_State[33] | Perm_row_2[ 9] 8
; 0x00 AEAD_State[34] | Perm_row_2[10] 8
; 0x00 AEAD_State[35] | Perm_row_2[11] <<< 8
; 0x00 AEAD_State[36] | Perm_row_3[ 0] 55
; 0x00 AEAD_State[37] | Perm_row_3[ 1] 55
; 0x00 AEAD_State[38] | Perm_row_3[ 2] 55
; 0x00 AEAD_State[39] | Perm_row_3[ 3] 55
; 0x00 AEAD_State[40] | Perm_row_3[ 4] 55
; 0x00 AEAD_State[41] | Perm_row_3[ 5] 55
; 0x00 AEAD_State[42] | Perm_row_3[ 6] 55
; 0x00 AEAD_State[43] | Perm_row_3[ 7] 55
; 0x00 AEAD_State[44] | Perm_row_3[ 8] 55
; 0x00 AEAD_State[45] | Perm_row_3[ 9] 55
; 0x00 AEAD_State[46] | Perm_row_3[10] 55
; 0x00 ^0x80 AEAD_State[47] | ^0x80 Perm_row_3[11] <<< 55
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
#define x10 r0
#define x11 r1
#define x12 r2
#define x13 r3
#define x14 r4
#define x15 r5
#define x16 r6
#define x17 r7
; an intentionally arrangement of registers to facilitate movw
#define x20 r8
#define x21 r10
#define x22 r12
#define x23 r14
#define x24 r9
#define x25 r11
#define x26 r13
#define x27 r15
; an intentionally arrangement of registers to facilitate movw
#define x30 r16
#define x35 r18
#define x32 r20
#define x37 r22
#define x34 r17
#define x31 r19
#define x36 r21
#define x33 r23
#define t0j r24
#define t1j r25
#define x0j r27
#include "assist.h"
.macro Sbox i0, i1, i2, i3
mov t0j, \i1
com \i0
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
ld x10, Y+
ld x11, Y+
ld x12, Y+
ld x13, Y+
ld x14, Y+
ld x15, Y+
ld x16, Y+
ld x17, Y+
ld x20, Y+
ld x21, Y+
ld x22, Y+
ld x23, Y+
ld x24, Y+
ld x25, Y+
ld x26, Y+
ld x27, Y+
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH
rjmp For_Hash
For_AEAD:
ldi ZL, lo8(RC_LFSR6)
ldi ZH, hi8(RC_LFSR6)
rjmp round_loop_start
For_Hash:
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
#elif defined(CRYPTO_AEAD)
ldi ZL, lo8(RC_LFSR6)
ldi ZH, hi8(RC_LFSR6)
#else
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
#endif
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
ld x0j, Y
eor x0j, t0j
; SubColumns
Sbox x0j, x10, x20, x30
st Y+, x0j
ld x0j, Y
Sbox x0j, x11, x21, x31
st Y+, x0j
ld x0j, Y
Sbox x0j, x12, x22, x32
st Y+, x0j
ld x0j, Y
Sbox x0j, x13, x23, x33
st Y+, x0j
ld x0j, Y
Sbox x0j, x14, x24, x34
st Y+, x0j
ld x0j, Y
Sbox x0j, x15, x25, x35
st Y+, x0j
ld x0j, Y
Sbox x0j, x16, x26, x36
st Y+, x0j
ld x0j, Y
Sbox x0j, x17, x27, x37
st Y, x0j
; ShiftRows
; <<< 1
mov t0j, x17
rol t0j
rol x10
rol x11
rol x12
rol x13
rol x14
rol x15
rol x16
rol x17
; <<< 8
; 7 6 5 4 3 2 1 0 => 6 5 4 3 2 1 0 7
;mov t0j, x27
;mov x27, x26
;mov x26, x25
;mov x25, x24
;mov x24, x23
;mov x23, x22
;mov x22, x21
;mov x21, x20
;mov x20, t0j
; an intentionally arrangement of registers to facilitate movw
movw t0j, x23 ; t1j:t0j <= x27:x23
movw x23, x22 ; x27:x23 <= x26:x22
movw x22, x21 ; x26:x22 <= x25:x21
movw x21, x20 ; x25:x21 <= x24:x20
mov x20, t1j ; x20 <= t1j
mov x24, t0j ; x24 <= t0j
; <<< 1
mov t0j, x37
rol t0j
rol x30
rol x31
rol x32
rol x33
rol x34
rol x35
rol x36
rol x37
; <<< 24
; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5
;mov t0j, x30
;mov x30, x35
;mov x35, x32
;mov x32, x37
;mov x37, x34
;mov x34, x31
;mov x31, x36
;mov x36, x33
;mov x33, t0j
; an intentionally arrangement of registers to facilitate movw
;x30 r16
;x35 r18
;x32 r20
;x37 r22
;x34 r17
;x31 r19
;x36 r21
;x33 r23
movw t0j, x30 ; t1j:t0j <= x34:x30
movw x30, x35 ; x34:x30 <= x31:x35
movw x35, x32 ; x31:x35 <= x36:x32
movw x32, x37 ; x36:x32 <= x33:x37
mov x37, t1j ; x37 <= x34
mov x33, t0j ; x33 <= x30
dec rcnt
breq round_loop_end
jmp round_loop_start
round_loop_end:
ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
st Y+, x10
st Y+, x11
st Y+, x12
st Y+, x13
st Y+, x14
st Y+, x15
st Y+, x16
st Y+, x17
st Y+, x20
st Y+, x21
st Y+, x22
st Y+, x23
st Y+, x24
st Y+, x25
st Y+, x26
st Y+, x27
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
POP_CONFLICT
ret
.section .text
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
RC_LFSR6:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06
.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29
.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28
.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24
.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37
.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26
.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f
.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#elif defined(CRYPTO_AEAD)
RC_LFSR6:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06
.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29
.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28
.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24
.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37
.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26
.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f
.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00
#else
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#endif
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
; an intentionally arrangement of registers to facilitate movw
#define x20 r0
#define x21 r2
#define x22 r4
#define x23 r6
#define x24 r8
#define x25 r10
#define x26 r1
#define x27 r3
#define x28 r5
#define x29 r7
#define x2a r9
#define x2b r11
; an intentionally arrangement of registers to facilitate movw
#define x30 r22
#define x35 r20
#define x3a r18
#define x33 r16
#define x38 r14
#define x31 r12
#define x36 r23
#define x3b r21
#define x34 r19
#define x39 r17
#define x32 r15
#define x37 r13
#define t0j r24
#define t1j r25
#define x0j r25
#define x1j r27
#include "assist.h"
.macro Sbox i0, i1, i2, i3
ldi t0j, 0xFF
eor \i0, t0j
mov t0j, \i1
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
.macro OneColumn i0, i1, i2, i3
ld \i0, Y
ldd \i1, Y + ROW_INBYTES
Sbox \i0, \i1, \i2, \i3
st Y+, \i0
rol \i1 ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES -1, \i1
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
ld x20, Y+
ld x21, Y+
ld x22, Y+
ld x23, Y+
ld x24, Y+
ld x25, Y+
ld x26, Y+
ld x27, Y+
ld x28, Y+
ld x29, Y+
ld x2a, Y+
ld x2b, Y+
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
ld x38, Y+
ld x39, Y+
ld x3a, Y+
ld x3b, Y+
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
ld x0j, Y
eor x0j, t0j
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x20, x30
st Y+, x0j
lsl x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES -1, x1j
OneColumn x0j, x1j, x21, x31
OneColumn x0j, x1j, x22, x32
OneColumn x0j, x1j, x23, x33
OneColumn x0j, x1j, x24, x34
OneColumn x0j, x1j, x25, x35
OneColumn x0j, x1j, x26, x36
OneColumn x0j, x1j, x27, x37
OneColumn x0j, x1j, x28, x38
OneColumn x0j, x1j, x29, x39
OneColumn x0j, x1j, x2a, x3a
OneColumn x0j, x1j, x2b, x3b
ld x1j, Y
eor t0j, t0j
adc x1j, t0j
st Y, x1j
; b a 9 8 7 6 5 4 3 2 1 0
; -- -- -- -- -- -- -- -- -- -- -- x- 0
; -- -- -- -- -- -- -- -- -- -- -- x' 0
; -- -- -- -- -- -- -- -- -- -- x- -- 1
; -- -- -- -- x' -- -- -- -- -- -- -- 7
; 4 3 2 1 0 b a 9 8 7 6 5
; ShiftRows -- the last two rows
; <<< 8
; b a 9 8 7 6 5 4 3 2 1 0 => a 9 8 7 6 5 4 3 2 1 0 b
movw t0j, x25 ; t1j:t0j <= x2b:x25
movw x25, x24 ; x2b:x25 <= x2a:x24
movw x24, x23 ; x2a:x24 <= x29:x23
movw x23, x22 ; x29:x23 <= x28:x22
movw x22, x21 ; x28:x22 <= x27:x21
movw x21, x20 ; x27:x21 <= x26:x20
mov x26, t0j ; x26 <= x25
mov x20, t1j ; x20 <= x2b
; >>> 1
mov t0j, x3b
ror t0j
ror x3a
ror x39
ror x38
ror x37
ror x36
ror x35
ror x34
ror x33
ror x32
ror x31
ror x30
ror x3b
; <<< 56
; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5
; mov x3j, x30
; mov x30, x35
; mov x35, x3a
; mov x3a, x33
; mov x33, x38
; mov x38, x31
; mov x31, x36
; mov x36, x3b
; mov x3b, x34
; mov x34, x39
; mov x39, x32
; mov x32, x37
; mov x37, x3j
; an intentionally arrangement of registers to facilitate movw
; x30 r22
; x35 r20
; x3a r18
; x33 r16
; x38 r14
; x31 r12
; x36 r23
; x3b r21
; x34 r19
; x39 r17
; x32 r15
; x37 r13
movw t0j, x30 ; t1j:t0j <= x36:x30
movw x30, x35 ; x36:x30 <= x3b:x35
movw x35, x3a ; x3b:x35 <= x34:x3a
movw x3a, x33 ; x34:x3a <= x39:x33
movw x33, x38 ; x39:x33 <= x32:x38
movw x38, x31 ; x32:x38 <= x37:x31
mov x31, t1j ; x31 <= x36
mov x37, t0j ; x37 <= x30
dec rcnt
breq round_loop_end
jmp round_loop_start
round_loop_end:
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
st Y+, x20
st Y+, x21
st Y+, x22
st Y+, x23
st Y+, x24
st Y+, x25
st Y+, x26
st Y+, x27
st Y+, x28
st Y+, x29
st Y+, x2a
st Y+, x2b
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
st Y+, x38
st Y+, x39
st Y+, x3a
st Y+, x3b
POP_CONFLICT
ret
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
#define x20 r0
#define x22 r2
#define x24 r4
#define x26 r6
#define x28 r1
#define x2a r3
#define x2c r5
#define x2e r7
#define x30 r8
#define x3d r10
#define x3a r12
#define x37 r14
#define x34 r16
#define x31 r18
#define x3e r20
#define x3b r22
#define x38 r9
#define x35 r11
#define x32 r13
#define x3f r15
#define x3c r17
#define x39 r19
#define x36 r21
#define x33 r23
#define t0j r24
#define t1j r25
#define x0j r25
#define x1j r27
#define x2j r26
#include "assist.h"
.macro Sbox i0, i1, i2, i3
ldi t0j, 0xFF
eor \i0, t0j
mov t0j, \i1
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
.macro TwoColumns i2_e, i3_e, i3_o
; column 2i
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, \i2_e, \i3_e
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
; column 2i+1
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x2j, \i3_o
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
ldd t0j, Y + 2 * ROW_INBYTES + 1
std Y + 2 * ROW_INBYTES + 1, x2j
mov x2j, t0j
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
push rcnt
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
ldd x20, Y + 0x00
ldd x22, Y + 0x02
ldd x24, Y + 0x04
ldd x26, Y + 0x06
ldd x28, Y + 0x08
ldd x2a, Y + 0x0a
ldd x2c, Y + 0x0c
ldd x2e, Y + 0x0e
adiw YL, ROW_INBYTES
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
ld x38, Y+
ld x39, Y+
ld x3a, Y+
ld x3b, Y+
ld x3c, Y+
ld x3d, Y+
ld x3e, Y+
ld x3f, Y+
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH
rjmp For_Hash
For_AEAD:
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
rjmp round_loop_start
For_Hash:
ldi ZL, lo8(RC_LFSR8)
ldi ZH, hi8(RC_LFSR8)
#elif defined(CRYPTO_AEAD)
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
#else
ldi ZL, lo8(RC_LFSR8)
ldi ZH, hi8(RC_LFSR8)
#endif
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
; column 0
ld x0j, Y
eor x0j, t0j
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x20, x30
st Y+, x0j
lsl x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
; column 1
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
ldd x2j, Y + 2 * ROW_INBYTES
Sbox x0j, x1j, x2j, x31
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
ldd t0j, Y + 2 * ROW_INBYTES + 1
std Y + 2 * ROW_INBYTES + 1, x2j
mov x2j, t0j
; column 2, 3
TwoColumns x22, x32, x33
; column 4, 5
TwoColumns x24, x34, x35
; column 6, 7
TwoColumns x26, x36, x37
; column 8, 9
TwoColumns x28, x38, x39
; column 10, 11
TwoColumns x2a, x3a, x3b
; column 12, 13
TwoColumns x2c, x3c, x3d
; column 14
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x2e, x3e
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
; column 15
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x2j, x3f
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
ld x1j, Y
eor t0j, t0j
adc x1j, t0j
st Y, x1j
std Y + ROW_INBYTES + 1, x2j
; f e d c b a 9 8 7 6 5 4 3 2 1 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2
; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3
; c b a 9 8 7 6 5 4 3 2 1 0 f e d
; x2e x2c x2a x28 x26 x24 x22 x20 => x2c x2a x28 x26 x24 x22 x20 x2e
;mov t0j, x2e
;mov x2e, x2c
;mov x2c, x2a
;mov x2a, x28
;mov x28, x26
;mov x26, x24
;mov x24, x22
;mov x22, x20
;mov x20, t0j
; an intentionally arrangement of registers to facilitate movw
movw t0j, x26 ; t1j:t0j <= x2e:x26
movw x26, x24 ; x2e:x26 <= x2c:x24
movw x24, x22 ; x2c:x24 <= x2a:x22
movw x22, x20 ; x2a:x22 <= x28:x20
mov x20, t1j ; x20 <= t1j
mov x28, t0j ; x28 <= t0j
; <<< 1
mov t0j, x3f
rol t0j
rol x30
rol x31
rol x32
rol x33
rol x34
rol x35
rol x36
rol x37
rol x38
rol x39
rol x3a
rol x3b
rol x3c
rol x3d
rol x3e
rol x3f
; <<< 24
; f e d c b a 9 8 7 6 5 4 3 2 1 0 =>
; c b a 9 8 7 6 5 4 3 2 1 0 f e d
; mov x3j, x30
; mov x30, x3d
; mov x3d, x3a
; mov x3a, x37
; mov x37, x34
; mov x34, x31
; mov x31, x3e
; mov x3e, x3b
; mov x3b, x38
; mov x38, x35
; mov x35, x32
; mov x32, x3f
; mov x3f, x3c
; mov x3c, x39
; mov x39, x36
; mov x36, x33
; mov x33, x3j
; an intentionally arrangement of registers to facilitate movw
; x30 r8
; x3d r10
; x3a r12
; x37 r14
; x34 r16
; x31 r18
; x3e r20
; x3b r22
; x38 r9
; x35 r11
; x32 r13
; x3f r15
; x3c r17
; x39 r19
; x36 r21
; x33 r23
movw t0j, x30 ; t1j:t0j <= x38:x30
movw x30, x3d ; x38:x30 <= x35:x3d
movw x3d, x3a ; x35:x3d <= x32:x3a
movw x3a, x37 ; x32:x3a <= x3f:x37
movw x37, x34 ; x3f:x37 <= x3c:x34
movw x34, x31 ; x3c:x34 <= x39:x31
movw x31, x3e ; x39:x31 <= x36:x3e
movw x3e, x3b ; x36:x3e <= x33:x3b
mov x3b, t1j ; x3b <= x38
mov x33, t0j ; x33 <= x30
pop rcnt
dec rcnt
push rcnt
breq round_loop_end
rjmp round_loop_start
round_loop_end:
pop rcnt
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
std Y + 0x00, x20
std Y + 0x02, x22
std Y + 0x04, x24
std Y + 0x06, x26
std Y + 0x08, x28
std Y + 0x0a, x2a
std Y + 0x0c, x2c
std Y + 0x0e, x2e
adiw YL, ROW_INBYTES
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
st Y+, x38
st Y+, x39
st Y+, x3a
st Y+, x3b
st Y+, x3c
st Y+, x3d
st Y+, x3e
st Y+, x3f
POP_CONFLICT
ret
.section .text
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
RC_LFSR8:
.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e
.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25
.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0
.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9
.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37
.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20
.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad
.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac
.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0
.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb
.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a
.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8
.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86
.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7
.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c
.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5
.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4
.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52
.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7
.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76
.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b
.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4
.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd
.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46
.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e
.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab
.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5
.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09
.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe
.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17
.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d
.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00
#elif defined(CRYPTO_AEAD)
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#else
RC_LFSR8:
.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e
.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25
.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0
.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9
.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37
.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20
.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad
.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac
.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0
.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb
.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a
.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8
.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86
.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7
.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c
.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5
.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4
.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52
.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7
.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76
.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b
.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4
.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd
.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46
.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e
.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab
.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5
.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09
.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe
.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17
.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d
.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00
#endif
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
;
; ============================================
; R E G I S T E R D E F I N I T I O N S
; ============================================
;
#define mclen r16
#define radlen r17
#define tcnt r17
#define tmp0 r20
#define tmp1 r21
#define cnt0 r22
#define rn r23
#define rate r24
; AEDH = 0b000: for authenticate AD
; AEDH = 0b001: for encryption
; AEDH = 0b011: for decryption
; AEDH = 0b100: for hash
#define AEDH r25
#define rcnt r26
#if (STATE_INBITS==256)
#include "knot256.h"
#elif (STATE_INBITS==384)
#include "knot384.h"
#elif (STATE_INBITS==512)
#include "knot512.h"
#else
#error "Not specified key size and state size"
#endif
#define CRYPTO_KEYBYTES 32
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 32
#define CRYPTO_ABYTES 32
#define CRYPTO_NOOVERLAP 1
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
.macro PUSH_CONFLICT
push r16
push r17
push r18
push r19
push r23
push r24
push r25
push r26
push r27
push r28
push r29
push r30
push r31
.endm
.macro POP_CONFLICT
pop r31
pop r30
pop r29
pop r28
pop r27
pop r26
pop r25
pop r24
pop r23
pop r19
pop r18
pop r17
pop r16
.endm
.macro PUSH_ALL
push r2
push r3
push r4
push r5
push r6
push r7
push r8
push r9
push r10
push r11
push r12
push r13
push r14
push r15
push r16
push r17
push r28
push r29
.endm
.macro POP_ALL
pop r29
pop r28
pop r17
pop r16
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
pop r9
pop r8
pop r7
pop r6
pop r5
pop r4
pop r3
pop r2
clr r1
.endm
\ No newline at end of file
#ifndef __CONFIG_H__
#define __CONFIG_H__
#define CRYPTO_AEAD
//#define CRYPTO_HASH
#define MAX_MESSAGE_LENGTH 128
#define STATE_INBITS 512
/* For CRYPTO_AEAD */
#define CRYPTO_KEYBITS 256
/* For CRYPTO_HASH */
#define CRYPTO_BITS 512
#define STATE_INBYTES ((STATE_INBITS + 7) / 8)
#define ROW_INBITS ((STATE_INBITS + 3) / 4)
#define ROW_INBYTES ((ROW_INBITS + 7) / 8)
/* For CRYPTO_AEAD */
#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8)
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES
#define CRYPTO_ABYTES CRYPTO_KEYBYTES
#define CRYPTO_NOOVERLAP 1
#define MAX_ASSOCIATED_DATA_LENGTH 32
#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES)
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
/* For CRYPTO_HASH */
#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8)
#define DOMAIN_BITS 0x80
#define PAD_BITS 0x01
#define S384_R192_BITS 0x80
#if (STATE_INBITS==256)
#define C1 1
#define C2 8
#define C3 25
#elif (STATE_INBITS==384)
#define C1 1
#define C2 8
#define C3 55
#elif (STATE_INBITS==512)
#define C1 1
#define C2 16
#define C3 25
#else
#error "Not specified state size"
#endif
#ifdef CRYPTO_AEAD
/* For CRYPTO_AEAD */
#define KEY_INBITS (CRYPTO_KEYBYTES * 8)
#define KEY_INBYTES (CRYPTO_KEYBYTES)
#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8)
#define NONCE_INBYTES (CRYPTO_NPUBBYTES)
#define TAG_INBITS (CRYPTO_ABYTES * 8)
#define TAG_INBYTES (CRYPTO_ABYTES)
#if (KEY_INBITS==128) && (STATE_INBITS==256)
#define RATE_INBITS 64
#define NR_0 52
#define NR_i 28
#define NR_f 32
#elif (KEY_INBITS==128) && (STATE_INBITS==384)
#define RATE_INBITS 192
#define NR_0 76
#define NR_i 28
#define NR_f 32
#elif (KEY_INBITS==192) && (STATE_INBITS==384)
#define RATE_INBITS 96
#define NR_0 76
#define NR_i 40
#define NR_f 44
#elif (KEY_INBITS==256) && (STATE_INBITS==512)
#define RATE_INBITS 128
#define NR_0 100
#define NR_i 52
#define NR_f 56
#else
#error "Not specified key size and state size"
#endif
#define RATE_INBYTES ((RATE_INBITS + 7) / 8)
#define SQUEEZE_RATE_INBYTES TAG_INBYTES
#endif
#ifdef CRYPTO_HASH
/* For CRYPTO_HASH */
#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8)
#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256)
#define HASH_RATE_INBITS 32
#define HASH_SQUEEZE_RATE_INBITS 128
#define NR_h 68
#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384)
#define HASH_RATE_INBITS 128
#define HASH_SQUEEZE_RATE_INBITS 128
#define NR_h 80
#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384)
#define HASH_RATE_INBITS 48
#define HASH_SQUEEZE_RATE_INBITS 192
#define NR_h 104
#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512)
#define HASH_RATE_INBITS 64
#define HASH_SQUEEZE_RATE_INBITS 256
#define NR_h 140
#else
#error "Not specified hash digest size and state size"
#endif
#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8)
#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8)
#endif
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
#endif
\ No newline at end of file
#ifdef __cplusplus
extern "C" {
#endif
int crypto_aead_encrypt(
unsigned char *c,unsigned long long *clen,
const unsigned char *m,unsigned long long mlen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m,unsigned long long *outputmlen,
unsigned char *nsec,
const unsigned char *c,unsigned long long clen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
#ifdef __cplusplus
}
#endif
#include <avr/io.h>
#include <avr/sfr_defs.h>
#include <stdlib.h>
#include <string.h>
#include "config.h"
extern void crypto_aead_encrypt_asm(
unsigned char *c,
const unsigned char *m,
unsigned char mlen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern int crypto_aead_decrypt_asm(
unsigned char *m,
const unsigned char *c,
unsigned char clen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern void crypto_hash_asm(
unsigned char *out,
const unsigned char *in,
unsigned char inlen
);
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the cipher implementation goes here,
... generating a ciphertext c[0],c[1],...,c[*clen-1]
... from a plaintext m[0],m[1],...,m[mlen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce npub[0],npub[1],..
... and secret key k[0],k[1],...
... the implementation shall not use nsec
...
... return 0;
*/
(void)nsec;
crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k);
*clen = mlen + TAG_INBYTES;
return 0;
}
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the AEAD implementation goes here,
... generating a plaintext m[0],m[1],...,m[*mlen-1]
... and secret message number nsec[0],nsec[1],...
... from a ciphertext c[0],c[1],...,c[clen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce number npub[0],npub[1],...
... and secret key k[0],k[1],...
...
... return 0;
*/
unsigned long long mlen_;
unsigned char tag_is_match;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
return -1;
}
mlen_ = clen - CRYPTO_ABYTES;
tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k);
if (tag_is_match != 0)
{
memset(m, 0, (size_t)mlen_);
return -1;
}
*mlen = mlen_;
return 0;
}
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
;
; ============================================
; S R A M D E F I N I T I O N S
; ============================================
;
#include <avr/io.h>
#include "config.h"
.section .noinit
SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#if (STATE_INBYTES > 32)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
#if (STATE_INBYTES > 48)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0
SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0
SRAM_MESSAGE_IN_LEN: .BYTE 0, 0
#ifdef CRYPTO_AEAD
; For CRYPTO_AEAD
SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0
SRAM_ADLEN: .BYTE 0, 0
SRAM_NONCE_ADDR: .BYTE 0, 0
SRAM_KEY_ADDR: .BYTE 0, 0
SRAM_ADDITIONAL:
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#if (CRYPTO_ABYTES > 16)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
#if (CRYPTO_ABYTES > 24)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
#endif
.section .text
#include "permutation.h"
; require YH:YL be the address of the current associated data/cipher/message block
; for enc and dec, store ciphertext or plaintext
; require ZH:ZL be the address of the current cipher/message block
.macro XOR_to_State_ENCDEC
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
mov cnt0, rate
XOR_to_State_loop_ENCDEC:
ld tmp0, Y+ ; plaintext/ciphertext
ld tmp1, X ; state
eor tmp1, tmp0 ; ciphertext/plaintext
st Z+, tmp1 ; store ciphertext/plaintext
sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte
mov tmp1, tmp0 ; if dec, replace state
st X+, tmp1 ; store state byte
dec cnt0
brne XOR_to_State_loop_ENCDEC
; YH:YL are now the address of the next associated data block
.endm
; require YH:YL be the address of the current associated data/cipher/message block
; for enc and dec, store ciphertext or plaintext
; require ZH:ZL be the address of the current cipher/message block
.macro XOR_to_State_AUTH
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
mov cnt0, rate
XOR_to_State_loop_AUTH:
ld tmp0, Y+ ; plaintext/ciphertext
ld tmp1, X ; state
eor tmp1, tmp0 ; ciphertext/plaintext
st X+, tmp1 ; store state byte
dec cnt0
brne XOR_to_State_loop_AUTH
; YH:YL are now the address of the next associated data block
.endm
; require YH:YL pointed to the input data
; require ZH:ZL pointed to the output data
; require cnt0 containes the nubmer of bytes in source data
; require number of bytes in source data less than rate, i.e., 0 <= cnt0 < rate
;
; the 0th bit in AEDH is used to distinguish (auth AD) or (enc/dec M/C):
; AEDH[0] = 0 for (auth AD), AEDH[0] = 1 for (enc/dec M/C)
; the 1th bit in AEDH is used to distinguish (auth AD/enc M) or (dec C):
; AEDH[1] = 0 for (auth AD/enc M), AEDH[1] = 1 for (dec C)
; AEDH = 0b000 for (auth AD)
; AEDH = 0b001 for (enc M)
; AEDH = 0b011 for (dec C)
Pad_XOR_to_State:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
tst cnt0
breq XOR_padded_data
XOR_source_data_loop:
ld tmp0, Y+ ; plaintext/ciphertext
ld tmp1, X ; state
eor tmp1, tmp0 ; ciphertext/plaintext
sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result
st Z+, tmp1 ; store ciphertext/plaintext
sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte
mov tmp1, tmp0 ; if dec, replace state
st X+, tmp1 ; store state byte
dec cnt0
brne XOR_source_data_loop
XOR_padded_data:
ldi tmp0, PAD_BITS
ld tmp1, X
eor tmp1, tmp0
st X, tmp1
ret
AddDomain:
ldi XH, hi8(SRAM_STATE + STATE_INBYTES - 1)
ldi XL, lo8(SRAM_STATE + STATE_INBYTES - 1)
ldi tmp0, DOMAIN_BITS
ld tmp1, X
eor tmp0, tmp1
st X, tmp0
ret
; require ZH:ZL be the address of the destination
EXTRACT_from_State:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
mov tmp1, rate
EXTRACT_from_State_loop:
ld tmp0, X+
st Z+, tmp0
dec tmp1
brne EXTRACT_from_State_loop
ret
AUTH:
tst radlen
breq AUTH_end
cp radlen, rate
brlo auth_ad_padded_block
auth_ad_loop:
XOR_to_State_AUTH
rcall Permutation
sub radlen, rate
cp radlen, rate
brlo auth_ad_padded_block
rjmp auth_ad_loop
auth_ad_padded_block:
mov cnt0, radlen
rcall Pad_XOR_to_State
rcall Permutation
AUTH_end:
ret
#ifdef CRYPTO_AEAD
Initialization:
ldi rn, NR_0
ldi XL, lo8(SRAM_STATE)
ldi XH, hi8(SRAM_STATE)
lds YH, SRAM_NONCE_ADDR
lds YL, SRAM_NONCE_ADDR + 1
ldi cnt0, CRYPTO_NPUBBYTES
load_nonce_loop:
ld tmp0, Y+
st X+, tmp0
dec cnt0
brne load_nonce_loop
lds YH, SRAM_KEY_ADDR
lds YL, SRAM_KEY_ADDR + 1
ldi cnt0, CRYPTO_KEYBYTES
load_key_loop:
ld tmp0, Y+
st X+, tmp0
dec cnt0
brne load_key_loop
#if (STATE_INBITS==384) && (RATE_INBITS==192)
ldi cnt0, (STATE_INBYTES - CRYPTO_NPUBBYTES - CRYPTO_KEYBYTES - 1)
clr tmp0
empty_state_loop:
st X+, tmp0
dec cnt0
brne empty_state_loop
ldi tmp0, S384_R192_BITS
st X+, tmp0
#endif
rcall Permutation
ret
ENC:
tst mclen
breq ENC_end
cp mclen, rate
brlo enc_padded_block
enc_loop:
XOR_to_State_ENCDEC
ldi rn, NR_i
rcall Permutation
sub mclen, rate
cp mclen, rate
brlo enc_padded_block
rjmp enc_loop
enc_padded_block:
mov cnt0, mclen
rcall Pad_XOR_to_State
ENC_end:
ret
Finalization:
ldi rate, SQUEEZE_RATE_INBYTES
ldi rn, NR_f
rcall Permutation
rcall EXTRACT_from_State
ret
; void crypto_aead_encrypt_asm(
; unsigned char *c,
; const unsigned char *m,
; unsigned long long mlen,
; const unsigned char *ad,
; unsigned long long adlen,
; const unsigned char *npub,
; const unsigned char *k
; )
;
; unsigned char *c, is passed in r24:r25
; const unsigned char *m, is passed in r22:r23
; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used
; const unsigned char *ad, is passed in r18:r19
; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used
; const unsigned char *npub, is passed in r14:r15
; const unsigned char *k is passed in r12:r13
.global crypto_aead_encrypt_asm
crypto_aead_encrypt_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN
st X+, r20
st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR
st X+, r18
st X+, r17 ;store associated data length in SRAM_ADLEN
st X+, r16
st X+, r15 ;store nonce address in SRAM_NONCE_ADDR
st X+, r14
st X+, r13 ;store key address in SRAM_KEY_ADDR
st X+, r12
mov radlen, r16
mov mclen, r20
rcall Initialization
ldi rn, NR_i
ldi rate, RATE_INBYTES
ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_ASSOCIATED_DATA_ADDR
lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1
rcall AUTH
rcall AddDomain
ldi AEDH, 0b001 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_MESSAGE_IN_ADDR
lds YL, SRAM_MESSAGE_IN_ADDR + 1
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
rcall ENC
rcall Finalization
POP_ALL
ret
; int crypto_aead_decrypt_asm(
; unsigned char *m,
; const unsigned char *c,
; unsigned long long clen,
; const unsigned char *ad,
; unsigned long long adlen,
; const unsigned char *npub,
; const unsigned char *k
; )
;
; unsigned char *m, is passed in r24:r25
; const unsigned char *c, is passed in r22:r23
; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used
; const unsigned char *ad, is passed in r18:r19
; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used
; const unsigned char *npub, is passed in r14:r15
; const unsigned char *k is passed in r12:r13
.global crypto_aead_decrypt_asm
crypto_aead_decrypt_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN
st X+, r20
st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR
st X+, r18
st X+, r17 ;store associated data length in SRAM_ADLEN
st X+, r16
st X+, r15 ;store nonce address in SRAM_NONCE_ADDR
st X+, r14
st X+, r13 ;store key address in SRAM_KEY_ADDR
st X+, r12
mov radlen, r16
mov mclen, r20
rcall Initialization
ldi rn, NR_i
ldi rate, RATE_INBYTES
ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_ASSOCIATED_DATA_ADDR
lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1
rcall AUTH
rcall AddDomain
ldi AEDH, 0b011 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_MESSAGE_IN_ADDR
lds YL, SRAM_MESSAGE_IN_ADDR + 1
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
rcall ENC
ldi ZH, hi8(SRAM_ADDITIONAL)
ldi ZL, lo8(SRAM_ADDITIONAL)
rcall Finalization
sbiw ZL, CRYPTO_ABYTES
ldi cnt0, CRYPTO_ABYTES
compare_tag:
ld tmp0, Z+
ld tmp1, Y+
cp tmp0, tmp1
brne return_tag_not_match
dec cnt0
brne compare_tag
rjmp return_tag_match
return_tag_not_match:
ldi r25, 0xFF
ldi r24, 0xFF
rjmp crypto_aead_decrypt_end
return_tag_match:
clr r25
clr r24
crypto_aead_decrypt_end:
POP_ALL
ret
; #ifdef CRYPTO_AEAD
#endif
#ifdef CRYPTO_HASH
; void crypto_hash_asm(
; unsigned char *out,
; const unsigned char *in,
; unsigned long long inlen
; )
;
; unsigned char *out, is passed in r24:r25
; const unsigned char *in, is passed in r22:r23
; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used
.global crypto_hash_asm
crypto_hash_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN
st X+, r20
mov mclen, r20
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128)
ldi cnt0, STATE_INBYTES - 1
#else
ldi cnt0, STATE_INBYTES
#endif
clr tmp0
zero_state:
st X+, tmp0
dec cnt0
brne zero_state
#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128)
ldi tmp0, S384_R192_BITS
st X+, tmp0
#endif
ldi rn, NR_h
ldi AEDH, 0b100
HASH_ABSORBING:
mov radlen, mclen
tst radlen
breq EMPTY_M
ldi rate, HASH_RATE_INBYTES
lds YH, SRAM_MESSAGE_IN_ADDR
lds YL, SRAM_MESSAGE_IN_ADDR + 1
rcall AUTH
rjmp HASH_SQUEEZING
EMPTY_M:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
ldi tmp0, PAD_BITS
ld tmp1, X
eor tmp1, tmp0
st X, tmp1
rcall Permutation
HASH_SQUEEZING:
ldi rate, HASH_SQUEEZE_RATE_INBYTES
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
ldi tcnt, CRYPTO_BYTES
SQUEEZING_loop:
rcall EXTRACT_from_State
subi tcnt, HASH_SQUEEZE_RATE_INBYTES
breq HASH_SQUEEZING_end
rcall Permutation
rjmp SQUEEZING_loop
HASH_SQUEEZING_end:
POP_ALL
ret
#endif
; Byte Order In AVR 8:
; KNOT-AEAD(128, 256, 64):
; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[0] 0 Tag[ 0]
; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[1] 0 Tag[ 1]
; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[2] 0 Tag[ 2]
; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[3] 0 Tag[ 3]
; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[4] 0 Tag[ 4]
; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[5] 0 Tag[ 5]
; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[6] 0 Tag[ 6]
; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[7] <<< 0 Tag[ 7]
; N[ 8] AEAD_State[ 8] | Perm_row_1[0] 1
; N[ 9] AEAD_State[ 9] | Perm_row_1[1] 1
; N[10] AEAD_State[10] | Perm_row_1[2] 1
; N[11] AEAD_State[11] | Perm_row_1[3] 1
; N[12] AEAD_State[12] | Perm_row_1[4] 1
; N[13] AEAD_State[13] | Perm_row_1[5] 1
; N[14] AEAD_State[14] | Perm_row_1[6] 1
; N[15] AEAD_State[15] | Perm_row_1[7] <<< 1
; K[ 0] AEAD_State[16] | Perm_row_2[0] 8
; K[ 1] AEAD_State[17] | Perm_row_2[1] 8
; K[ 2] AEAD_State[18] | Perm_row_2[2] 8
; K[ 3] AEAD_State[19] | Perm_row_2[3] 8
; K[ 4] AEAD_State[20] | Perm_row_2[4] 8
; K[ 5] AEAD_State[21] | Perm_row_2[5] 8
; K[ 6] AEAD_State[22] | Perm_row_2[6] 8
; K[ 7] AEAD_State[23] | Perm_row_2[7] <<< 8
; K[ 8] AEAD_State[24] | Perm_row_3[0] 25
; K[ 9] AEAD_State[25] | Perm_row_3[1] 25
; K[10] AEAD_State[26] | Perm_row_3[2] 25
; K[11] AEAD_State[27] | Perm_row_3[3] 25
; K[12] AEAD_State[28] | Perm_row_3[4] 25
; K[13] AEAD_State[29] | Perm_row_3[5] 25
; K[14] AEAD_State[30] | Perm_row_3[6] 25
; K[15] AEAD_State[31] | ^0x80 Perm_row_3[7] <<< 25
;
;
; KNOT-AEAD(128, 384, 192):
; Initalization
; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[ 0] 0 Tag[ 0]
; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[ 1] 0 Tag[ 1]
; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[ 2] 0 Tag[ 2]
; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[ 3] 0 Tag[ 3]
; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[ 4] 0 Tag[ 4]
; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[ 5] 0 Tag[ 5]
; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[ 6] 0 Tag[ 6]
; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[ 7] 0 Tag[ 7]
; N[ 8] AEAD_State[ 8] | Message[ 8] 0x00 Perm_row_0[ 8] 0 Tag[ 8]
; N[ 9] AEAD_State[ 9] | Message[ 9] 0x00 Perm_row_0[ 9] 0 Tag[ 9]
; N[10] AEAD_State[10] | Message[10] 0x00 Perm_row_0[10] 0 Tag[10]
; N[11] AEAD_State[11] | Message[11] 0x00 Perm_row_0[11] <<< 0 Tag[11]
; N[12] AEAD_State[12] | Message[12] 0x00 Perm_row_1[ 0] 1 Tag[12]
; N[13] AEAD_State[13] | Message[13] 0x00 Perm_row_1[ 1] 1 Tag[13]
; N[14] AEAD_State[14] | Message[14] 0x00 Perm_row_1[ 2] 1 Tag[14]
; N[15] AEAD_State[15] | Message[15] 0x00 Perm_row_1[ 3] 1 Tag[15]
; K[ 0] AEAD_State[16] | Message[16] 0x00 Perm_row_1[ 4] 1
; K[ 1] AEAD_State[17] | Message[17] 0x00 Perm_row_1[ 5] 1
; K[ 2] AEAD_State[18] | Message[18] 0x00 Perm_row_1[ 6] 1
; K[ 3] AEAD_State[19] | Message[19] 0x00 Perm_row_1[ 7] 1
; K[ 4] AEAD_State[20] | Message[20] 0x00 Perm_row_1[ 8] 1
; K[ 5] AEAD_State[21] | Message[21] 0x00 Perm_row_1[ 9] 1
; K[ 6] AEAD_State[22] | Message[22] 0x00 Perm_row_1[10] 1
; K[ 7] AEAD_State[23] | Message[23] 0x00 Perm_row_1[11] <<< 1
; K[ 8] AEAD_State[24] | Perm_row_2[ 0] 8
; K[ 9] AEAD_State[25] | Perm_row_2[ 1] 8
; K[10] AEAD_State[26] | Perm_row_2[ 2] 8
; K[11] AEAD_State[27] | Perm_row_2[ 3] 8
; K[12] AEAD_State[28] | Perm_row_2[ 4] 8
; K[13] AEAD_State[29] | Perm_row_2[ 5] 8
; K[14] AEAD_State[30] | Perm_row_2[ 6] 8
; K[15] AEAD_State[31] | Perm_row_2[ 7] 8
; 0x00 AEAD_State[32] | Perm_row_2[ 8] 8
; 0x00 AEAD_State[33] | Perm_row_2[ 9] 8
; 0x00 AEAD_State[34] | Perm_row_2[10] 8
; 0x00 AEAD_State[35] | Perm_row_2[11] <<< 8
; 0x00 AEAD_State[36] | Perm_row_3[ 0] 55
; 0x00 AEAD_State[37] | Perm_row_3[ 1] 55
; 0x00 AEAD_State[38] | Perm_row_3[ 2] 55
; 0x00 AEAD_State[39] | Perm_row_3[ 3] 55
; 0x00 AEAD_State[40] | Perm_row_3[ 4] 55
; 0x00 AEAD_State[41] | Perm_row_3[ 5] 55
; 0x00 AEAD_State[42] | Perm_row_3[ 6] 55
; 0x00 AEAD_State[43] | Perm_row_3[ 7] 55
; 0x00 AEAD_State[44] | Perm_row_3[ 8] 55
; 0x00 AEAD_State[45] | Perm_row_3[ 9] 55
; 0x00 AEAD_State[46] | Perm_row_3[10] 55
; 0x00 ^0x80 AEAD_State[47] | ^0x80 Perm_row_3[11] <<< 55
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
#define x10 r0
#define x11 r1
#define x12 r2
#define x13 r3
#define x14 r4
#define x15 r5
#define x16 r6
#define x17 r7
; an intentionally arrangement of registers to facilitate movw
#define x20 r8
#define x21 r10
#define x22 r12
#define x23 r14
#define x24 r9
#define x25 r11
#define x26 r13
#define x27 r15
; an intentionally arrangement of registers to facilitate movw
#define x30 r16
#define x35 r18
#define x32 r20
#define x37 r22
#define x34 r17
#define x31 r19
#define x36 r21
#define x33 r23
#define t0j r24
#define t1j r25
#define x0j r27
#include "assist.h"
.macro Sbox i0, i1, i2, i3
mov t0j, \i1
com \i0
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
ld x10, Y+
ld x11, Y+
ld x12, Y+
ld x13, Y+
ld x14, Y+
ld x15, Y+
ld x16, Y+
ld x17, Y+
ld x20, Y+
ld x21, Y+
ld x22, Y+
ld x23, Y+
ld x24, Y+
ld x25, Y+
ld x26, Y+
ld x27, Y+
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH
rjmp For_Hash
For_AEAD:
ldi ZL, lo8(RC_LFSR6)
ldi ZH, hi8(RC_LFSR6)
rjmp round_loop_start
For_Hash:
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
#elif defined(CRYPTO_AEAD)
ldi ZL, lo8(RC_LFSR6)
ldi ZH, hi8(RC_LFSR6)
#else
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
#endif
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
ld x0j, Y
eor x0j, t0j
; SubColumns
Sbox x0j, x10, x20, x30
st Y+, x0j
ld x0j, Y
Sbox x0j, x11, x21, x31
st Y+, x0j
ld x0j, Y
Sbox x0j, x12, x22, x32
st Y+, x0j
ld x0j, Y
Sbox x0j, x13, x23, x33
st Y+, x0j
ld x0j, Y
Sbox x0j, x14, x24, x34
st Y+, x0j
ld x0j, Y
Sbox x0j, x15, x25, x35
st Y+, x0j
ld x0j, Y
Sbox x0j, x16, x26, x36
st Y+, x0j
ld x0j, Y
Sbox x0j, x17, x27, x37
st Y, x0j
; ShiftRows
; <<< 1
mov t0j, x17
rol t0j
rol x10
rol x11
rol x12
rol x13
rol x14
rol x15
rol x16
rol x17
; <<< 8
; 7 6 5 4 3 2 1 0 => 6 5 4 3 2 1 0 7
;mov t0j, x27
;mov x27, x26
;mov x26, x25
;mov x25, x24
;mov x24, x23
;mov x23, x22
;mov x22, x21
;mov x21, x20
;mov x20, t0j
; an intentionally arrangement of registers to facilitate movw
movw t0j, x23 ; t1j:t0j <= x27:x23
movw x23, x22 ; x27:x23 <= x26:x22
movw x22, x21 ; x26:x22 <= x25:x21
movw x21, x20 ; x25:x21 <= x24:x20
mov x20, t1j ; x20 <= t1j
mov x24, t0j ; x24 <= t0j
; <<< 1
mov t0j, x37
rol t0j
rol x30
rol x31
rol x32
rol x33
rol x34
rol x35
rol x36
rol x37
; <<< 24
; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5
;mov t0j, x30
;mov x30, x35
;mov x35, x32
;mov x32, x37
;mov x37, x34
;mov x34, x31
;mov x31, x36
;mov x36, x33
;mov x33, t0j
; an intentionally arrangement of registers to facilitate movw
;x30 r16
;x35 r18
;x32 r20
;x37 r22
;x34 r17
;x31 r19
;x36 r21
;x33 r23
movw t0j, x30 ; t1j:t0j <= x34:x30
movw x30, x35 ; x34:x30 <= x31:x35
movw x35, x32 ; x31:x35 <= x36:x32
movw x32, x37 ; x36:x32 <= x33:x37
mov x37, t1j ; x37 <= x34
mov x33, t0j ; x33 <= x30
dec rcnt
breq round_loop_end
jmp round_loop_start
round_loop_end:
ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
st Y+, x10
st Y+, x11
st Y+, x12
st Y+, x13
st Y+, x14
st Y+, x15
st Y+, x16
st Y+, x17
st Y+, x20
st Y+, x21
st Y+, x22
st Y+, x23
st Y+, x24
st Y+, x25
st Y+, x26
st Y+, x27
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
POP_CONFLICT
ret
.section .text
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
RC_LFSR6:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06
.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29
.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28
.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24
.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37
.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26
.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f
.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#elif defined(CRYPTO_AEAD)
RC_LFSR6:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06
.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29
.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28
.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24
.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37
.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26
.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f
.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00
#else
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#endif
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
; an intentionally arrangement of registers to facilitate movw
#define x20 r0
#define x21 r2
#define x22 r4
#define x23 r6
#define x24 r8
#define x25 r10
#define x26 r1
#define x27 r3
#define x28 r5
#define x29 r7
#define x2a r9
#define x2b r11
; an intentionally arrangement of registers to facilitate movw
#define x30 r22
#define x35 r20
#define x3a r18
#define x33 r16
#define x38 r14
#define x31 r12
#define x36 r23
#define x3b r21
#define x34 r19
#define x39 r17
#define x32 r15
#define x37 r13
#define t0j r24
#define t1j r25
#define x0j r25
#define x1j r27
#include "assist.h"
.macro Sbox i0, i1, i2, i3
ldi t0j, 0xFF
eor \i0, t0j
mov t0j, \i1
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
.macro OneColumn i0, i1, i2, i3
ld \i0, Y
ldd \i1, Y + ROW_INBYTES
Sbox \i0, \i1, \i2, \i3
st Y+, \i0
rol \i1 ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES -1, \i1
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
ld x20, Y+
ld x21, Y+
ld x22, Y+
ld x23, Y+
ld x24, Y+
ld x25, Y+
ld x26, Y+
ld x27, Y+
ld x28, Y+
ld x29, Y+
ld x2a, Y+
ld x2b, Y+
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
ld x38, Y+
ld x39, Y+
ld x3a, Y+
ld x3b, Y+
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
ld x0j, Y
eor x0j, t0j
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x20, x30
st Y+, x0j
lsl x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES -1, x1j
OneColumn x0j, x1j, x21, x31
OneColumn x0j, x1j, x22, x32
OneColumn x0j, x1j, x23, x33
OneColumn x0j, x1j, x24, x34
OneColumn x0j, x1j, x25, x35
OneColumn x0j, x1j, x26, x36
OneColumn x0j, x1j, x27, x37
OneColumn x0j, x1j, x28, x38
OneColumn x0j, x1j, x29, x39
OneColumn x0j, x1j, x2a, x3a
OneColumn x0j, x1j, x2b, x3b
ld x1j, Y
eor t0j, t0j
adc x1j, t0j
st Y, x1j
; b a 9 8 7 6 5 4 3 2 1 0
; -- -- -- -- -- -- -- -- -- -- -- x- 0
; -- -- -- -- -- -- -- -- -- -- -- x' 0
; -- -- -- -- -- -- -- -- -- -- x- -- 1
; -- -- -- -- x' -- -- -- -- -- -- -- 7
; 4 3 2 1 0 b a 9 8 7 6 5
; ShiftRows -- the last two rows
; <<< 8
; b a 9 8 7 6 5 4 3 2 1 0 => a 9 8 7 6 5 4 3 2 1 0 b
movw t0j, x25 ; t1j:t0j <= x2b:x25
movw x25, x24 ; x2b:x25 <= x2a:x24
movw x24, x23 ; x2a:x24 <= x29:x23
movw x23, x22 ; x29:x23 <= x28:x22
movw x22, x21 ; x28:x22 <= x27:x21
movw x21, x20 ; x27:x21 <= x26:x20
mov x26, t0j ; x26 <= x25
mov x20, t1j ; x20 <= x2b
; >>> 1
mov t0j, x3b
ror t0j
ror x3a
ror x39
ror x38
ror x37
ror x36
ror x35
ror x34
ror x33
ror x32
ror x31
ror x30
ror x3b
; <<< 56
; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5
; mov x3j, x30
; mov x30, x35
; mov x35, x3a
; mov x3a, x33
; mov x33, x38
; mov x38, x31
; mov x31, x36
; mov x36, x3b
; mov x3b, x34
; mov x34, x39
; mov x39, x32
; mov x32, x37
; mov x37, x3j
; an intentionally arrangement of registers to facilitate movw
; x30 r22
; x35 r20
; x3a r18
; x33 r16
; x38 r14
; x31 r12
; x36 r23
; x3b r21
; x34 r19
; x39 r17
; x32 r15
; x37 r13
movw t0j, x30 ; t1j:t0j <= x36:x30
movw x30, x35 ; x36:x30 <= x3b:x35
movw x35, x3a ; x3b:x35 <= x34:x3a
movw x3a, x33 ; x34:x3a <= x39:x33
movw x33, x38 ; x39:x33 <= x32:x38
movw x38, x31 ; x32:x38 <= x37:x31
mov x31, t1j ; x31 <= x36
mov x37, t0j ; x37 <= x30
dec rcnt
breq round_loop_end
jmp round_loop_start
round_loop_end:
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
st Y+, x20
st Y+, x21
st Y+, x22
st Y+, x23
st Y+, x24
st Y+, x25
st Y+, x26
st Y+, x27
st Y+, x28
st Y+, x29
st Y+, x2a
st Y+, x2b
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
st Y+, x38
st Y+, x39
st Y+, x3a
st Y+, x3b
POP_CONFLICT
ret
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
#define x20 r0
#define x22 r2
#define x24 r4
#define x26 r6
#define x28 r1
#define x2a r3
#define x2c r5
#define x2e r7
#define x30 r8
#define x3d r10
#define x3a r12
#define x37 r14
#define x34 r16
#define x31 r18
#define x3e r20
#define x3b r22
#define x38 r9
#define x35 r11
#define x32 r13
#define x3f r15
#define x3c r17
#define x39 r19
#define x36 r21
#define x33 r23
#define t0j r24
#define t1j r25
#define x0j r25
#define x1j r27
#define x2j r26
#include "assist.h"
.macro Sbox i0, i1, i2, i3
ldi t0j, 0xFF
eor \i0, t0j
mov t0j, \i1
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
.macro TwoColumns i2_e, i3_e, i3_o
; column 2i
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, \i2_e, \i3_e
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
; column 2i+1
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x2j, \i3_o
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
ldd t0j, Y + 2 * ROW_INBYTES + 1
std Y + 2 * ROW_INBYTES + 1, x2j
mov x2j, t0j
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
push rcnt
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
ldd x20, Y + 0x00
ldd x22, Y + 0x02
ldd x24, Y + 0x04
ldd x26, Y + 0x06
ldd x28, Y + 0x08
ldd x2a, Y + 0x0a
ldd x2c, Y + 0x0c
ldd x2e, Y + 0x0e
adiw YL, ROW_INBYTES
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
ld x38, Y+
ld x39, Y+
ld x3a, Y+
ld x3b, Y+
ld x3c, Y+
ld x3d, Y+
ld x3e, Y+
ld x3f, Y+
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH
rjmp For_Hash
For_AEAD:
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
rjmp round_loop_start
For_Hash:
ldi ZL, lo8(RC_LFSR8)
ldi ZH, hi8(RC_LFSR8)
#elif defined(CRYPTO_AEAD)
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
#else
ldi ZL, lo8(RC_LFSR8)
ldi ZH, hi8(RC_LFSR8)
#endif
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
; column 0
ld x0j, Y
eor x0j, t0j
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x20, x30
st Y+, x0j
lsl x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
; column 1
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
ldd x2j, Y + 2 * ROW_INBYTES
Sbox x0j, x1j, x2j, x31
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
ldd t0j, Y + 2 * ROW_INBYTES + 1
std Y + 2 * ROW_INBYTES + 1, x2j
mov x2j, t0j
; column 2, 3
TwoColumns x22, x32, x33
; column 4, 5
TwoColumns x24, x34, x35
; column 6, 7
TwoColumns x26, x36, x37
; column 8, 9
TwoColumns x28, x38, x39
; column 10, 11
TwoColumns x2a, x3a, x3b
; column 12, 13
TwoColumns x2c, x3c, x3d
; column 14
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x2e, x3e
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
; column 15
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x2j, x3f
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
ld x1j, Y
eor t0j, t0j
adc x1j, t0j
st Y, x1j
std Y + ROW_INBYTES + 1, x2j
; f e d c b a 9 8 7 6 5 4 3 2 1 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2
; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3
; c b a 9 8 7 6 5 4 3 2 1 0 f e d
; x2e x2c x2a x28 x26 x24 x22 x20 => x2c x2a x28 x26 x24 x22 x20 x2e
;mov t0j, x2e
;mov x2e, x2c
;mov x2c, x2a
;mov x2a, x28
;mov x28, x26
;mov x26, x24
;mov x24, x22
;mov x22, x20
;mov x20, t0j
; an intentionally arrangement of registers to facilitate movw
movw t0j, x26 ; t1j:t0j <= x2e:x26
movw x26, x24 ; x2e:x26 <= x2c:x24
movw x24, x22 ; x2c:x24 <= x2a:x22
movw x22, x20 ; x2a:x22 <= x28:x20
mov x20, t1j ; x20 <= t1j
mov x28, t0j ; x28 <= t0j
; <<< 1
mov t0j, x3f
rol t0j
rol x30
rol x31
rol x32
rol x33
rol x34
rol x35
rol x36
rol x37
rol x38
rol x39
rol x3a
rol x3b
rol x3c
rol x3d
rol x3e
rol x3f
; <<< 24
; f e d c b a 9 8 7 6 5 4 3 2 1 0 =>
; c b a 9 8 7 6 5 4 3 2 1 0 f e d
; mov x3j, x30
; mov x30, x3d
; mov x3d, x3a
; mov x3a, x37
; mov x37, x34
; mov x34, x31
; mov x31, x3e
; mov x3e, x3b
; mov x3b, x38
; mov x38, x35
; mov x35, x32
; mov x32, x3f
; mov x3f, x3c
; mov x3c, x39
; mov x39, x36
; mov x36, x33
; mov x33, x3j
; an intentionally arrangement of registers to facilitate movw
; x30 r8
; x3d r10
; x3a r12
; x37 r14
; x34 r16
; x31 r18
; x3e r20
; x3b r22
; x38 r9
; x35 r11
; x32 r13
; x3f r15
; x3c r17
; x39 r19
; x36 r21
; x33 r23
movw t0j, x30 ; t1j:t0j <= x38:x30
movw x30, x3d ; x38:x30 <= x35:x3d
movw x3d, x3a ; x35:x3d <= x32:x3a
movw x3a, x37 ; x32:x3a <= x3f:x37
movw x37, x34 ; x3f:x37 <= x3c:x34
movw x34, x31 ; x3c:x34 <= x39:x31
movw x31, x3e ; x39:x31 <= x36:x3e
movw x3e, x3b ; x36:x3e <= x33:x3b
mov x3b, t1j ; x3b <= x38
mov x33, t0j ; x33 <= x30
pop rcnt
dec rcnt
push rcnt
breq round_loop_end
rjmp round_loop_start
round_loop_end:
pop rcnt
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
std Y + 0x00, x20
std Y + 0x02, x22
std Y + 0x04, x24
std Y + 0x06, x26
std Y + 0x08, x28
std Y + 0x0a, x2a
std Y + 0x0c, x2c
std Y + 0x0e, x2e
adiw YL, ROW_INBYTES
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
st Y+, x38
st Y+, x39
st Y+, x3a
st Y+, x3b
st Y+, x3c
st Y+, x3d
st Y+, x3e
st Y+, x3f
POP_CONFLICT
ret
.section .text
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
RC_LFSR8:
.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e
.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25
.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0
.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9
.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37
.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20
.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad
.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac
.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0
.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb
.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a
.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8
.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86
.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7
.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c
.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5
.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4
.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52
.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7
.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76
.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b
.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4
.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd
.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46
.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e
.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab
.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5
.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09
.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe
.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17
.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d
.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00
#elif defined(CRYPTO_AEAD)
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#else
RC_LFSR8:
.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e
.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25
.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0
.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9
.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37
.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20
.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad
.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac
.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0
.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb
.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a
.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8
.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86
.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7
.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c
.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5
.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4
.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52
.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7
.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76
.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b
.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4
.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd
.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46
.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e
.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab
.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5
.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09
.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe
.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17
.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d
.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00
#endif
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
;
; ============================================
; R E G I S T E R D E F I N I T I O N S
; ============================================
;
#define mclen r16
#define radlen r17
#define tcnt r17
#define tmp0 r20
#define tmp1 r21
#define cnt0 r22
#define rn r23
#define rate r24
; AEDH = 0b000: for authenticate AD
; AEDH = 0b001: for encryption
; AEDH = 0b011: for decryption
; AEDH = 0b100: for hash
#define AEDH r25
#define rcnt r26
#if (STATE_INBITS==256)
#include "knot256.h"
#elif (STATE_INBITS==384)
#include "knot384.h"
#elif (STATE_INBITS==512)
#include "knot512.h"
#else
#error "Not specified key size and state size"
#endif
#define CRYPTO_BYTES 32
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
.macro PUSH_CONFLICT
push r16
push r17
push r18
push r19
push r23
push r24
push r25
push r26
push r27
push r28
push r29
push r30
push r31
.endm
.macro POP_CONFLICT
pop r31
pop r30
pop r29
pop r28
pop r27
pop r26
pop r25
pop r24
pop r23
pop r19
pop r18
pop r17
pop r16
.endm
.macro PUSH_ALL
push r2
push r3
push r4
push r5
push r6
push r7
push r8
push r9
push r10
push r11
push r12
push r13
push r14
push r15
push r16
push r17
push r28
push r29
.endm
.macro POP_ALL
pop r29
pop r28
pop r17
pop r16
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
pop r9
pop r8
pop r7
pop r6
pop r5
pop r4
pop r3
pop r2
clr r1
.endm
\ No newline at end of file
#ifndef __CONFIG_H__
#define __CONFIG_H__
//#define CRYPTO_AEAD
#define CRYPTO_HASH
#define MAX_MESSAGE_LENGTH 128
#define STATE_INBITS 256
/* For CRYPTO_AEAD */
#define CRYPTO_KEYBITS 128
/* For CRYPTO_HASH */
#define CRYPTO_BITS 256
#define STATE_INBYTES ((STATE_INBITS + 7) / 8)
#define ROW_INBITS ((STATE_INBITS + 3) / 4)
#define ROW_INBYTES ((ROW_INBITS + 7) / 8)
/* For CRYPTO_AEAD */
#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8)
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES
#define CRYPTO_ABYTES CRYPTO_KEYBYTES
#define CRYPTO_NOOVERLAP 1
#define MAX_ASSOCIATED_DATA_LENGTH 32
#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES)
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
/* For CRYPTO_HASH */
#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8)
#define DOMAIN_BITS 0x80
#define PAD_BITS 0x01
#define S384_R192_BITS 0x80
#if (STATE_INBITS==256)
#define C1 1
#define C2 8
#define C3 25
#elif (STATE_INBITS==384)
#define C1 1
#define C2 8
#define C3 55
#elif (STATE_INBITS==512)
#define C1 1
#define C2 16
#define C3 25
#else
#error "Not specified state size"
#endif
#ifdef CRYPTO_AEAD
/* For CRYPTO_AEAD */
#define KEY_INBITS (CRYPTO_KEYBYTES * 8)
#define KEY_INBYTES (CRYPTO_KEYBYTES)
#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8)
#define NONCE_INBYTES (CRYPTO_NPUBBYTES)
#define TAG_INBITS (CRYPTO_ABYTES * 8)
#define TAG_INBYTES (CRYPTO_ABYTES)
#if (KEY_INBITS==128) && (STATE_INBITS==256)
#define RATE_INBITS 64
#define NR_0 52
#define NR_i 28
#define NR_f 32
#elif (KEY_INBITS==128) && (STATE_INBITS==384)
#define RATE_INBITS 192
#define NR_0 76
#define NR_i 28
#define NR_f 32
#elif (KEY_INBITS==192) && (STATE_INBITS==384)
#define RATE_INBITS 96
#define NR_0 76
#define NR_i 40
#define NR_f 44
#elif (KEY_INBITS==256) && (STATE_INBITS==512)
#define RATE_INBITS 128
#define NR_0 100
#define NR_i 52
#define NR_f 56
#else
#error "Not specified key size and state size"
#endif
#define RATE_INBYTES ((RATE_INBITS + 7) / 8)
#define SQUEEZE_RATE_INBYTES TAG_INBYTES
#endif
#ifdef CRYPTO_HASH
/* For CRYPTO_HASH */
#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8)
#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256)
#define HASH_RATE_INBITS 32
#define HASH_SQUEEZE_RATE_INBITS 128
#define NR_h 68
#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384)
#define HASH_RATE_INBITS 128
#define HASH_SQUEEZE_RATE_INBITS 128
#define NR_h 80
#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384)
#define HASH_RATE_INBITS 48
#define HASH_SQUEEZE_RATE_INBITS 192
#define NR_h 104
#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512)
#define HASH_RATE_INBITS 64
#define HASH_SQUEEZE_RATE_INBITS 256
#define NR_h 140
#else
#error "Not specified hash digest size and state size"
#endif
#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8)
#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8)
#endif
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
#endif
\ No newline at end of file
#ifdef __cplusplus
extern "C" {
#endif
int crypto_hash(
unsigned char *out,
const unsigned char *in,
unsigned long long inlen
);
#ifdef __cplusplus
}
#endif
\ No newline at end of file
#include <avr/io.h>
#include <avr/sfr_defs.h>
#include <stdlib.h>
#include <string.h>
#include "config.h"
extern void crypto_aead_encrypt_asm(
unsigned char *c,
const unsigned char *m,
unsigned char mlen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern int crypto_aead_decrypt_asm(
unsigned char *m,
const unsigned char *c,
unsigned char clen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern void crypto_hash_asm(
unsigned char *out,
const unsigned char *in,
unsigned char inlen
);
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the cipher implementation goes here,
... generating a ciphertext c[0],c[1],...,c[*clen-1]
... from a plaintext m[0],m[1],...,m[mlen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce npub[0],npub[1],..
... and secret key k[0],k[1],...
... the implementation shall not use nsec
...
... return 0;
*/
(void)nsec;
crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k);
*clen = mlen + TAG_INBYTES;
return 0;
}
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the AEAD implementation goes here,
... generating a plaintext m[0],m[1],...,m[*mlen-1]
... and secret message number nsec[0],nsec[1],...
... from a ciphertext c[0],c[1],...,c[clen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce number npub[0],npub[1],...
... and secret key k[0],k[1],...
...
... return 0;
*/
unsigned long long mlen_;
unsigned char tag_is_match;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
return -1;
}
mlen_ = clen - CRYPTO_ABYTES;
tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k);
if (tag_is_match != 0)
{
memset(m, 0, (size_t)mlen_);
return -1;
}
*mlen = mlen_;
return 0;
}
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
;
; ============================================
; S R A M D E F I N I T I O N S
; ============================================
;
#include <avr/io.h>
#include "config.h"
.section .noinit
SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#if (STATE_INBYTES > 32)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
#if (STATE_INBYTES > 48)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0
SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0
SRAM_MESSAGE_IN_LEN: .BYTE 0, 0
#ifdef CRYPTO_AEAD
; For CRYPTO_AEAD
SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0
SRAM_ADLEN: .BYTE 0, 0
SRAM_NONCE_ADDR: .BYTE 0, 0
SRAM_KEY_ADDR: .BYTE 0, 0
SRAM_ADDITIONAL:
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#if (CRYPTO_ABYTES > 16)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
#if (CRYPTO_ABYTES > 24)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
#endif
.section .text
#include "permutation.h"
; require YH:YL be the address of the current associated data/cipher/message block
; for enc and dec, store ciphertext or plaintext
; require ZH:ZL be the address of the current cipher/message block
.macro XOR_to_State_ENCDEC
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
mov cnt0, rate
XOR_to_State_loop_ENCDEC:
ld tmp0, Y+ ; plaintext/ciphertext
ld tmp1, X ; state
eor tmp1, tmp0 ; ciphertext/plaintext
st Z+, tmp1 ; store ciphertext/plaintext
sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte
mov tmp1, tmp0 ; if dec, replace state
st X+, tmp1 ; store state byte
dec cnt0
brne XOR_to_State_loop_ENCDEC
; YH:YL are now the address of the next associated data block
.endm
; require YH:YL be the address of the current associated data/cipher/message block
; for enc and dec, store ciphertext or plaintext
; require ZH:ZL be the address of the current cipher/message block
.macro XOR_to_State_AUTH
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
mov cnt0, rate
XOR_to_State_loop_AUTH:
ld tmp0, Y+ ; plaintext/ciphertext
ld tmp1, X ; state
eor tmp1, tmp0 ; ciphertext/plaintext
st X+, tmp1 ; store state byte
dec cnt0
brne XOR_to_State_loop_AUTH
; YH:YL are now the address of the next associated data block
.endm
; require YH:YL pointed to the input data
; require ZH:ZL pointed to the output data
; require cnt0 containes the nubmer of bytes in source data
; require number of bytes in source data less than rate, i.e., 0 <= cnt0 < rate
;
; the 0th bit in AEDH is used to distinguish (auth AD) or (enc/dec M/C):
; AEDH[0] = 0 for (auth AD), AEDH[0] = 1 for (enc/dec M/C)
; the 1th bit in AEDH is used to distinguish (auth AD/enc M) or (dec C):
; AEDH[1] = 0 for (auth AD/enc M), AEDH[1] = 1 for (dec C)
; AEDH = 0b000 for (auth AD)
; AEDH = 0b001 for (enc M)
; AEDH = 0b011 for (dec C)
Pad_XOR_to_State:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
tst cnt0
breq XOR_padded_data
XOR_source_data_loop:
ld tmp0, Y+ ; plaintext/ciphertext
ld tmp1, X ; state
eor tmp1, tmp0 ; ciphertext/plaintext
sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result
st Z+, tmp1 ; store ciphertext/plaintext
sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte
mov tmp1, tmp0 ; if dec, replace state
st X+, tmp1 ; store state byte
dec cnt0
brne XOR_source_data_loop
XOR_padded_data:
ldi tmp0, PAD_BITS
ld tmp1, X
eor tmp1, tmp0
st X, tmp1
ret
AddDomain:
ldi XH, hi8(SRAM_STATE + STATE_INBYTES - 1)
ldi XL, lo8(SRAM_STATE + STATE_INBYTES - 1)
ldi tmp0, DOMAIN_BITS
ld tmp1, X
eor tmp0, tmp1
st X, tmp0
ret
; require ZH:ZL be the address of the destination
EXTRACT_from_State:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
mov tmp1, rate
EXTRACT_from_State_loop:
ld tmp0, X+
st Z+, tmp0
dec tmp1
brne EXTRACT_from_State_loop
ret
AUTH:
tst radlen
breq AUTH_end
cp radlen, rate
brlo auth_ad_padded_block
auth_ad_loop:
XOR_to_State_AUTH
rcall Permutation
sub radlen, rate
cp radlen, rate
brlo auth_ad_padded_block
rjmp auth_ad_loop
auth_ad_padded_block:
mov cnt0, radlen
rcall Pad_XOR_to_State
rcall Permutation
AUTH_end:
ret
#ifdef CRYPTO_AEAD
Initialization:
ldi rn, NR_0
ldi XL, lo8(SRAM_STATE)
ldi XH, hi8(SRAM_STATE)
lds YH, SRAM_NONCE_ADDR
lds YL, SRAM_NONCE_ADDR + 1
ldi cnt0, CRYPTO_NPUBBYTES
load_nonce_loop:
ld tmp0, Y+
st X+, tmp0
dec cnt0
brne load_nonce_loop
lds YH, SRAM_KEY_ADDR
lds YL, SRAM_KEY_ADDR + 1
ldi cnt0, CRYPTO_KEYBYTES
load_key_loop:
ld tmp0, Y+
st X+, tmp0
dec cnt0
brne load_key_loop
#if (STATE_INBITS==384) && (RATE_INBITS==192)
ldi cnt0, (STATE_INBYTES - CRYPTO_NPUBBYTES - CRYPTO_KEYBYTES - 1)
clr tmp0
empty_state_loop:
st X+, tmp0
dec cnt0
brne empty_state_loop
ldi tmp0, S384_R192_BITS
st X+, tmp0
#endif
rcall Permutation
ret
ENC:
tst mclen
breq ENC_end
cp mclen, rate
brlo enc_padded_block
enc_loop:
XOR_to_State_ENCDEC
ldi rn, NR_i
rcall Permutation
sub mclen, rate
cp mclen, rate
brlo enc_padded_block
rjmp enc_loop
enc_padded_block:
mov cnt0, mclen
rcall Pad_XOR_to_State
ENC_end:
ret
Finalization:
ldi rate, SQUEEZE_RATE_INBYTES
ldi rn, NR_f
rcall Permutation
rcall EXTRACT_from_State
ret
; void crypto_aead_encrypt_asm(
; unsigned char *c,
; const unsigned char *m,
; unsigned long long mlen,
; const unsigned char *ad,
; unsigned long long adlen,
; const unsigned char *npub,
; const unsigned char *k
; )
;
; unsigned char *c, is passed in r24:r25
; const unsigned char *m, is passed in r22:r23
; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used
; const unsigned char *ad, is passed in r18:r19
; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used
; const unsigned char *npub, is passed in r14:r15
; const unsigned char *k is passed in r12:r13
.global crypto_aead_encrypt_asm
crypto_aead_encrypt_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN
st X+, r20
st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR
st X+, r18
st X+, r17 ;store associated data length in SRAM_ADLEN
st X+, r16
st X+, r15 ;store nonce address in SRAM_NONCE_ADDR
st X+, r14
st X+, r13 ;store key address in SRAM_KEY_ADDR
st X+, r12
mov radlen, r16
mov mclen, r20
rcall Initialization
ldi rn, NR_i
ldi rate, RATE_INBYTES
ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_ASSOCIATED_DATA_ADDR
lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1
rcall AUTH
rcall AddDomain
ldi AEDH, 0b001 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_MESSAGE_IN_ADDR
lds YL, SRAM_MESSAGE_IN_ADDR + 1
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
rcall ENC
rcall Finalization
POP_ALL
ret
; int crypto_aead_decrypt_asm(
; unsigned char *m,
; const unsigned char *c,
; unsigned long long clen,
; const unsigned char *ad,
; unsigned long long adlen,
; const unsigned char *npub,
; const unsigned char *k
; )
;
; unsigned char *m, is passed in r24:r25
; const unsigned char *c, is passed in r22:r23
; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used
; const unsigned char *ad, is passed in r18:r19
; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used
; const unsigned char *npub, is passed in r14:r15
; const unsigned char *k is passed in r12:r13
.global crypto_aead_decrypt_asm
crypto_aead_decrypt_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN
st X+, r20
st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR
st X+, r18
st X+, r17 ;store associated data length in SRAM_ADLEN
st X+, r16
st X+, r15 ;store nonce address in SRAM_NONCE_ADDR
st X+, r14
st X+, r13 ;store key address in SRAM_KEY_ADDR
st X+, r12
mov radlen, r16
mov mclen, r20
rcall Initialization
ldi rn, NR_i
ldi rate, RATE_INBYTES
ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_ASSOCIATED_DATA_ADDR
lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1
rcall AUTH
rcall AddDomain
ldi AEDH, 0b011 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_MESSAGE_IN_ADDR
lds YL, SRAM_MESSAGE_IN_ADDR + 1
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
rcall ENC
ldi ZH, hi8(SRAM_ADDITIONAL)
ldi ZL, lo8(SRAM_ADDITIONAL)
rcall Finalization
sbiw ZL, CRYPTO_ABYTES
ldi cnt0, CRYPTO_ABYTES
compare_tag:
ld tmp0, Z+
ld tmp1, Y+
cp tmp0, tmp1
brne return_tag_not_match
dec cnt0
brne compare_tag
rjmp return_tag_match
return_tag_not_match:
ldi r25, 0xFF
ldi r24, 0xFF
rjmp crypto_aead_decrypt_end
return_tag_match:
clr r25
clr r24
crypto_aead_decrypt_end:
POP_ALL
ret
; #ifdef CRYPTO_AEAD
#endif
#ifdef CRYPTO_HASH
; void crypto_hash_asm(
; unsigned char *out,
; const unsigned char *in,
; unsigned long long inlen
; )
;
; unsigned char *out, is passed in r24:r25
; const unsigned char *in, is passed in r22:r23
; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used
.global crypto_hash_asm
crypto_hash_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN
st X+, r20
mov mclen, r20
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128)
ldi cnt0, STATE_INBYTES - 1
#else
ldi cnt0, STATE_INBYTES
#endif
clr tmp0
zero_state:
st X+, tmp0
dec cnt0
brne zero_state
#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128)
ldi tmp0, S384_R192_BITS
st X+, tmp0
#endif
ldi rn, NR_h
ldi AEDH, 0b100
HASH_ABSORBING:
mov radlen, mclen
tst radlen
breq EMPTY_M
ldi rate, HASH_RATE_INBYTES
lds YH, SRAM_MESSAGE_IN_ADDR
lds YL, SRAM_MESSAGE_IN_ADDR + 1
rcall AUTH
rjmp HASH_SQUEEZING
EMPTY_M:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
ldi tmp0, PAD_BITS
ld tmp1, X
eor tmp1, tmp0
st X, tmp1
rcall Permutation
HASH_SQUEEZING:
ldi rate, HASH_SQUEEZE_RATE_INBYTES
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
ldi tcnt, CRYPTO_BYTES
SQUEEZING_loop:
rcall EXTRACT_from_State
subi tcnt, HASH_SQUEEZE_RATE_INBYTES
breq HASH_SQUEEZING_end
rcall Permutation
rjmp SQUEEZING_loop
HASH_SQUEEZING_end:
POP_ALL
ret
#endif
; Byte Order In AVR 8:
; KNOT-AEAD(128, 256, 64):
; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[0] 0 Tag[ 0]
; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[1] 0 Tag[ 1]
; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[2] 0 Tag[ 2]
; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[3] 0 Tag[ 3]
; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[4] 0 Tag[ 4]
; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[5] 0 Tag[ 5]
; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[6] 0 Tag[ 6]
; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[7] <<< 0 Tag[ 7]
; N[ 8] AEAD_State[ 8] | Perm_row_1[0] 1
; N[ 9] AEAD_State[ 9] | Perm_row_1[1] 1
; N[10] AEAD_State[10] | Perm_row_1[2] 1
; N[11] AEAD_State[11] | Perm_row_1[3] 1
; N[12] AEAD_State[12] | Perm_row_1[4] 1
; N[13] AEAD_State[13] | Perm_row_1[5] 1
; N[14] AEAD_State[14] | Perm_row_1[6] 1
; N[15] AEAD_State[15] | Perm_row_1[7] <<< 1
; K[ 0] AEAD_State[16] | Perm_row_2[0] 8
; K[ 1] AEAD_State[17] | Perm_row_2[1] 8
; K[ 2] AEAD_State[18] | Perm_row_2[2] 8
; K[ 3] AEAD_State[19] | Perm_row_2[3] 8
; K[ 4] AEAD_State[20] | Perm_row_2[4] 8
; K[ 5] AEAD_State[21] | Perm_row_2[5] 8
; K[ 6] AEAD_State[22] | Perm_row_2[6] 8
; K[ 7] AEAD_State[23] | Perm_row_2[7] <<< 8
; K[ 8] AEAD_State[24] | Perm_row_3[0] 25
; K[ 9] AEAD_State[25] | Perm_row_3[1] 25
; K[10] AEAD_State[26] | Perm_row_3[2] 25
; K[11] AEAD_State[27] | Perm_row_3[3] 25
; K[12] AEAD_State[28] | Perm_row_3[4] 25
; K[13] AEAD_State[29] | Perm_row_3[5] 25
; K[14] AEAD_State[30] | Perm_row_3[6] 25
; K[15] AEAD_State[31] | ^0x80 Perm_row_3[7] <<< 25
;
;
; KNOT-AEAD(128, 384, 192):
; Initalization
; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[ 0] 0 Tag[ 0]
; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[ 1] 0 Tag[ 1]
; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[ 2] 0 Tag[ 2]
; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[ 3] 0 Tag[ 3]
; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[ 4] 0 Tag[ 4]
; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[ 5] 0 Tag[ 5]
; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[ 6] 0 Tag[ 6]
; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[ 7] 0 Tag[ 7]
; N[ 8] AEAD_State[ 8] | Message[ 8] 0x00 Perm_row_0[ 8] 0 Tag[ 8]
; N[ 9] AEAD_State[ 9] | Message[ 9] 0x00 Perm_row_0[ 9] 0 Tag[ 9]
; N[10] AEAD_State[10] | Message[10] 0x00 Perm_row_0[10] 0 Tag[10]
; N[11] AEAD_State[11] | Message[11] 0x00 Perm_row_0[11] <<< 0 Tag[11]
; N[12] AEAD_State[12] | Message[12] 0x00 Perm_row_1[ 0] 1 Tag[12]
; N[13] AEAD_State[13] | Message[13] 0x00 Perm_row_1[ 1] 1 Tag[13]
; N[14] AEAD_State[14] | Message[14] 0x00 Perm_row_1[ 2] 1 Tag[14]
; N[15] AEAD_State[15] | Message[15] 0x00 Perm_row_1[ 3] 1 Tag[15]
; K[ 0] AEAD_State[16] | Message[16] 0x00 Perm_row_1[ 4] 1
; K[ 1] AEAD_State[17] | Message[17] 0x00 Perm_row_1[ 5] 1
; K[ 2] AEAD_State[18] | Message[18] 0x00 Perm_row_1[ 6] 1
; K[ 3] AEAD_State[19] | Message[19] 0x00 Perm_row_1[ 7] 1
; K[ 4] AEAD_State[20] | Message[20] 0x00 Perm_row_1[ 8] 1
; K[ 5] AEAD_State[21] | Message[21] 0x00 Perm_row_1[ 9] 1
; K[ 6] AEAD_State[22] | Message[22] 0x00 Perm_row_1[10] 1
; K[ 7] AEAD_State[23] | Message[23] 0x00 Perm_row_1[11] <<< 1
; K[ 8] AEAD_State[24] | Perm_row_2[ 0] 8
; K[ 9] AEAD_State[25] | Perm_row_2[ 1] 8
; K[10] AEAD_State[26] | Perm_row_2[ 2] 8
; K[11] AEAD_State[27] | Perm_row_2[ 3] 8
; K[12] AEAD_State[28] | Perm_row_2[ 4] 8
; K[13] AEAD_State[29] | Perm_row_2[ 5] 8
; K[14] AEAD_State[30] | Perm_row_2[ 6] 8
; K[15] AEAD_State[31] | Perm_row_2[ 7] 8
; 0x00 AEAD_State[32] | Perm_row_2[ 8] 8
; 0x00 AEAD_State[33] | Perm_row_2[ 9] 8
; 0x00 AEAD_State[34] | Perm_row_2[10] 8
; 0x00 AEAD_State[35] | Perm_row_2[11] <<< 8
; 0x00 AEAD_State[36] | Perm_row_3[ 0] 55
; 0x00 AEAD_State[37] | Perm_row_3[ 1] 55
; 0x00 AEAD_State[38] | Perm_row_3[ 2] 55
; 0x00 AEAD_State[39] | Perm_row_3[ 3] 55
; 0x00 AEAD_State[40] | Perm_row_3[ 4] 55
; 0x00 AEAD_State[41] | Perm_row_3[ 5] 55
; 0x00 AEAD_State[42] | Perm_row_3[ 6] 55
; 0x00 AEAD_State[43] | Perm_row_3[ 7] 55
; 0x00 AEAD_State[44] | Perm_row_3[ 8] 55
; 0x00 AEAD_State[45] | Perm_row_3[ 9] 55
; 0x00 AEAD_State[46] | Perm_row_3[10] 55
; 0x00 ^0x80 AEAD_State[47] | ^0x80 Perm_row_3[11] <<< 55
#include <avr/io.h>
#include <avr/sfr_defs.h>
#include <stdlib.h>
#include <string.h>
#include "api.h"
#include "crypto_hash.h"
extern void crypto_hash_asm(
unsigned char *out,
const unsigned char *in,
unsigned char inlen
);
int crypto_hash(
unsigned char *out,
const unsigned char *in,
unsigned long long inlen
)
{
/*
...
... the code for the hash function implementation goes here
... generating a hash value out[0],out[1],...,out[CRYPTO_BYTES-1]
... from a message in[0],in[1],...,in[in-1]
...
... return 0;
*/
crypto_hash_asm(out, in, inlen);
return 0;
}
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
#define x10 r0
#define x11 r1
#define x12 r2
#define x13 r3
#define x14 r4
#define x15 r5
#define x16 r6
#define x17 r7
; an intentionally arrangement of registers to facilitate movw
#define x20 r8
#define x21 r10
#define x22 r12
#define x23 r14
#define x24 r9
#define x25 r11
#define x26 r13
#define x27 r15
; an intentionally arrangement of registers to facilitate movw
#define x30 r16
#define x35 r18
#define x32 r20
#define x37 r22
#define x34 r17
#define x31 r19
#define x36 r21
#define x33 r23
#define t0j r24
#define t1j r25
#define x0j r27
#include "assist.h"
.macro Sbox i0, i1, i2, i3
mov t0j, \i1
com \i0
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
ld x10, Y+
ld x11, Y+
ld x12, Y+
ld x13, Y+
ld x14, Y+
ld x15, Y+
ld x16, Y+
ld x17, Y+
ld x20, Y+
ld x21, Y+
ld x22, Y+
ld x23, Y+
ld x24, Y+
ld x25, Y+
ld x26, Y+
ld x27, Y+
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH
rjmp For_Hash
For_AEAD:
ldi ZL, lo8(RC_LFSR6)
ldi ZH, hi8(RC_LFSR6)
rjmp round_loop_start
For_Hash:
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
#elif defined(CRYPTO_AEAD)
ldi ZL, lo8(RC_LFSR6)
ldi ZH, hi8(RC_LFSR6)
#else
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
#endif
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
ld x0j, Y
eor x0j, t0j
; SubColumns
Sbox x0j, x10, x20, x30
st Y+, x0j
ld x0j, Y
Sbox x0j, x11, x21, x31
st Y+, x0j
ld x0j, Y
Sbox x0j, x12, x22, x32
st Y+, x0j
ld x0j, Y
Sbox x0j, x13, x23, x33
st Y+, x0j
ld x0j, Y
Sbox x0j, x14, x24, x34
st Y+, x0j
ld x0j, Y
Sbox x0j, x15, x25, x35
st Y+, x0j
ld x0j, Y
Sbox x0j, x16, x26, x36
st Y+, x0j
ld x0j, Y
Sbox x0j, x17, x27, x37
st Y, x0j
; ShiftRows
; <<< 1
mov t0j, x17
rol t0j
rol x10
rol x11
rol x12
rol x13
rol x14
rol x15
rol x16
rol x17
; <<< 8
; 7 6 5 4 3 2 1 0 => 6 5 4 3 2 1 0 7
;mov t0j, x27
;mov x27, x26
;mov x26, x25
;mov x25, x24
;mov x24, x23
;mov x23, x22
;mov x22, x21
;mov x21, x20
;mov x20, t0j
; an intentionally arrangement of registers to facilitate movw
movw t0j, x23 ; t1j:t0j <= x27:x23
movw x23, x22 ; x27:x23 <= x26:x22
movw x22, x21 ; x26:x22 <= x25:x21
movw x21, x20 ; x25:x21 <= x24:x20
mov x20, t1j ; x20 <= t1j
mov x24, t0j ; x24 <= t0j
; <<< 1
mov t0j, x37
rol t0j
rol x30
rol x31
rol x32
rol x33
rol x34
rol x35
rol x36
rol x37
; <<< 24
; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5
;mov t0j, x30
;mov x30, x35
;mov x35, x32
;mov x32, x37
;mov x37, x34
;mov x34, x31
;mov x31, x36
;mov x36, x33
;mov x33, t0j
; an intentionally arrangement of registers to facilitate movw
;x30 r16
;x35 r18
;x32 r20
;x37 r22
;x34 r17
;x31 r19
;x36 r21
;x33 r23
movw t0j, x30 ; t1j:t0j <= x34:x30
movw x30, x35 ; x34:x30 <= x31:x35
movw x35, x32 ; x31:x35 <= x36:x32
movw x32, x37 ; x36:x32 <= x33:x37
mov x37, t1j ; x37 <= x34
mov x33, t0j ; x33 <= x30
dec rcnt
breq round_loop_end
jmp round_loop_start
round_loop_end:
ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
st Y+, x10
st Y+, x11
st Y+, x12
st Y+, x13
st Y+, x14
st Y+, x15
st Y+, x16
st Y+, x17
st Y+, x20
st Y+, x21
st Y+, x22
st Y+, x23
st Y+, x24
st Y+, x25
st Y+, x26
st Y+, x27
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
POP_CONFLICT
ret
.section .text
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
RC_LFSR6:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06
.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29
.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28
.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24
.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37
.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26
.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f
.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#elif defined(CRYPTO_AEAD)
RC_LFSR6:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06
.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29
.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28
.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24
.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37
.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26
.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f
.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00
#else
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#endif
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
; an intentionally arrangement of registers to facilitate movw
#define x20 r0
#define x21 r2
#define x22 r4
#define x23 r6
#define x24 r8
#define x25 r10
#define x26 r1
#define x27 r3
#define x28 r5
#define x29 r7
#define x2a r9
#define x2b r11
; an intentionally arrangement of registers to facilitate movw
#define x30 r22
#define x35 r20
#define x3a r18
#define x33 r16
#define x38 r14
#define x31 r12
#define x36 r23
#define x3b r21
#define x34 r19
#define x39 r17
#define x32 r15
#define x37 r13
#define t0j r24
#define t1j r25
#define x0j r25
#define x1j r27
#include "assist.h"
.macro Sbox i0, i1, i2, i3
ldi t0j, 0xFF
eor \i0, t0j
mov t0j, \i1
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
.macro OneColumn i0, i1, i2, i3
ld \i0, Y
ldd \i1, Y + ROW_INBYTES
Sbox \i0, \i1, \i2, \i3
st Y+, \i0
rol \i1 ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES -1, \i1
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
ld x20, Y+
ld x21, Y+
ld x22, Y+
ld x23, Y+
ld x24, Y+
ld x25, Y+
ld x26, Y+
ld x27, Y+
ld x28, Y+
ld x29, Y+
ld x2a, Y+
ld x2b, Y+
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
ld x38, Y+
ld x39, Y+
ld x3a, Y+
ld x3b, Y+
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
ld x0j, Y
eor x0j, t0j
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x20, x30
st Y+, x0j
lsl x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES -1, x1j
OneColumn x0j, x1j, x21, x31
OneColumn x0j, x1j, x22, x32
OneColumn x0j, x1j, x23, x33
OneColumn x0j, x1j, x24, x34
OneColumn x0j, x1j, x25, x35
OneColumn x0j, x1j, x26, x36
OneColumn x0j, x1j, x27, x37
OneColumn x0j, x1j, x28, x38
OneColumn x0j, x1j, x29, x39
OneColumn x0j, x1j, x2a, x3a
OneColumn x0j, x1j, x2b, x3b
ld x1j, Y
eor t0j, t0j
adc x1j, t0j
st Y, x1j
; b a 9 8 7 6 5 4 3 2 1 0
; -- -- -- -- -- -- -- -- -- -- -- x- 0
; -- -- -- -- -- -- -- -- -- -- -- x' 0
; -- -- -- -- -- -- -- -- -- -- x- -- 1
; -- -- -- -- x' -- -- -- -- -- -- -- 7
; 4 3 2 1 0 b a 9 8 7 6 5
; ShiftRows -- the last two rows
; <<< 8
; b a 9 8 7 6 5 4 3 2 1 0 => a 9 8 7 6 5 4 3 2 1 0 b
movw t0j, x25 ; t1j:t0j <= x2b:x25
movw x25, x24 ; x2b:x25 <= x2a:x24
movw x24, x23 ; x2a:x24 <= x29:x23
movw x23, x22 ; x29:x23 <= x28:x22
movw x22, x21 ; x28:x22 <= x27:x21
movw x21, x20 ; x27:x21 <= x26:x20
mov x26, t0j ; x26 <= x25
mov x20, t1j ; x20 <= x2b
; >>> 1
mov t0j, x3b
ror t0j
ror x3a
ror x39
ror x38
ror x37
ror x36
ror x35
ror x34
ror x33
ror x32
ror x31
ror x30
ror x3b
; <<< 56
; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5
; mov x3j, x30
; mov x30, x35
; mov x35, x3a
; mov x3a, x33
; mov x33, x38
; mov x38, x31
; mov x31, x36
; mov x36, x3b
; mov x3b, x34
; mov x34, x39
; mov x39, x32
; mov x32, x37
; mov x37, x3j
; an intentionally arrangement of registers to facilitate movw
; x30 r22
; x35 r20
; x3a r18
; x33 r16
; x38 r14
; x31 r12
; x36 r23
; x3b r21
; x34 r19
; x39 r17
; x32 r15
; x37 r13
movw t0j, x30 ; t1j:t0j <= x36:x30
movw x30, x35 ; x36:x30 <= x3b:x35
movw x35, x3a ; x3b:x35 <= x34:x3a
movw x3a, x33 ; x34:x3a <= x39:x33
movw x33, x38 ; x39:x33 <= x32:x38
movw x38, x31 ; x32:x38 <= x37:x31
mov x31, t1j ; x31 <= x36
mov x37, t0j ; x37 <= x30
dec rcnt
breq round_loop_end
jmp round_loop_start
round_loop_end:
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
st Y+, x20
st Y+, x21
st Y+, x22
st Y+, x23
st Y+, x24
st Y+, x25
st Y+, x26
st Y+, x27
st Y+, x28
st Y+, x29
st Y+, x2a
st Y+, x2b
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
st Y+, x38
st Y+, x39
st Y+, x3a
st Y+, x3b
POP_CONFLICT
ret
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
#define x20 r0
#define x22 r2
#define x24 r4
#define x26 r6
#define x28 r1
#define x2a r3
#define x2c r5
#define x2e r7
#define x30 r8
#define x3d r10
#define x3a r12
#define x37 r14
#define x34 r16
#define x31 r18
#define x3e r20
#define x3b r22
#define x38 r9
#define x35 r11
#define x32 r13
#define x3f r15
#define x3c r17
#define x39 r19
#define x36 r21
#define x33 r23
#define t0j r24
#define t1j r25
#define x0j r25
#define x1j r27
#define x2j r26
#include "assist.h"
.macro Sbox i0, i1, i2, i3
ldi t0j, 0xFF
eor \i0, t0j
mov t0j, \i1
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
.macro TwoColumns i2_e, i3_e, i3_o
; column 2i
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, \i2_e, \i3_e
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
; column 2i+1
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x2j, \i3_o
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
ldd t0j, Y + 2 * ROW_INBYTES + 1
std Y + 2 * ROW_INBYTES + 1, x2j
mov x2j, t0j
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
push rcnt
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
ldd x20, Y + 0x00
ldd x22, Y + 0x02
ldd x24, Y + 0x04
ldd x26, Y + 0x06
ldd x28, Y + 0x08
ldd x2a, Y + 0x0a
ldd x2c, Y + 0x0c
ldd x2e, Y + 0x0e
adiw YL, ROW_INBYTES
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
ld x38, Y+
ld x39, Y+
ld x3a, Y+
ld x3b, Y+
ld x3c, Y+
ld x3d, Y+
ld x3e, Y+
ld x3f, Y+
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH
rjmp For_Hash
For_AEAD:
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
rjmp round_loop_start
For_Hash:
ldi ZL, lo8(RC_LFSR8)
ldi ZH, hi8(RC_LFSR8)
#elif defined(CRYPTO_AEAD)
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
#else
ldi ZL, lo8(RC_LFSR8)
ldi ZH, hi8(RC_LFSR8)
#endif
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
; column 0
ld x0j, Y
eor x0j, t0j
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x20, x30
st Y+, x0j
lsl x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
; column 1
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
ldd x2j, Y + 2 * ROW_INBYTES
Sbox x0j, x1j, x2j, x31
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
ldd t0j, Y + 2 * ROW_INBYTES + 1
std Y + 2 * ROW_INBYTES + 1, x2j
mov x2j, t0j
; column 2, 3
TwoColumns x22, x32, x33
; column 4, 5
TwoColumns x24, x34, x35
; column 6, 7
TwoColumns x26, x36, x37
; column 8, 9
TwoColumns x28, x38, x39
; column 10, 11
TwoColumns x2a, x3a, x3b
; column 12, 13
TwoColumns x2c, x3c, x3d
; column 14
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x2e, x3e
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
; column 15
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x2j, x3f
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
ld x1j, Y
eor t0j, t0j
adc x1j, t0j
st Y, x1j
std Y + ROW_INBYTES + 1, x2j
; f e d c b a 9 8 7 6 5 4 3 2 1 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2
; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3
; c b a 9 8 7 6 5 4 3 2 1 0 f e d
; x2e x2c x2a x28 x26 x24 x22 x20 => x2c x2a x28 x26 x24 x22 x20 x2e
;mov t0j, x2e
;mov x2e, x2c
;mov x2c, x2a
;mov x2a, x28
;mov x28, x26
;mov x26, x24
;mov x24, x22
;mov x22, x20
;mov x20, t0j
; an intentionally arrangement of registers to facilitate movw
movw t0j, x26 ; t1j:t0j <= x2e:x26
movw x26, x24 ; x2e:x26 <= x2c:x24
movw x24, x22 ; x2c:x24 <= x2a:x22
movw x22, x20 ; x2a:x22 <= x28:x20
mov x20, t1j ; x20 <= t1j
mov x28, t0j ; x28 <= t0j
; <<< 1
mov t0j, x3f
rol t0j
rol x30
rol x31
rol x32
rol x33
rol x34
rol x35
rol x36
rol x37
rol x38
rol x39
rol x3a
rol x3b
rol x3c
rol x3d
rol x3e
rol x3f
; <<< 24
; f e d c b a 9 8 7 6 5 4 3 2 1 0 =>
; c b a 9 8 7 6 5 4 3 2 1 0 f e d
; mov x3j, x30
; mov x30, x3d
; mov x3d, x3a
; mov x3a, x37
; mov x37, x34
; mov x34, x31
; mov x31, x3e
; mov x3e, x3b
; mov x3b, x38
; mov x38, x35
; mov x35, x32
; mov x32, x3f
; mov x3f, x3c
; mov x3c, x39
; mov x39, x36
; mov x36, x33
; mov x33, x3j
; an intentionally arrangement of registers to facilitate movw
; x30 r8
; x3d r10
; x3a r12
; x37 r14
; x34 r16
; x31 r18
; x3e r20
; x3b r22
; x38 r9
; x35 r11
; x32 r13
; x3f r15
; x3c r17
; x39 r19
; x36 r21
; x33 r23
movw t0j, x30 ; t1j:t0j <= x38:x30
movw x30, x3d ; x38:x30 <= x35:x3d
movw x3d, x3a ; x35:x3d <= x32:x3a
movw x3a, x37 ; x32:x3a <= x3f:x37
movw x37, x34 ; x3f:x37 <= x3c:x34
movw x34, x31 ; x3c:x34 <= x39:x31
movw x31, x3e ; x39:x31 <= x36:x3e
movw x3e, x3b ; x36:x3e <= x33:x3b
mov x3b, t1j ; x3b <= x38
mov x33, t0j ; x33 <= x30
pop rcnt
dec rcnt
push rcnt
breq round_loop_end
rjmp round_loop_start
round_loop_end:
pop rcnt
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
std Y + 0x00, x20
std Y + 0x02, x22
std Y + 0x04, x24
std Y + 0x06, x26
std Y + 0x08, x28
std Y + 0x0a, x2a
std Y + 0x0c, x2c
std Y + 0x0e, x2e
adiw YL, ROW_INBYTES
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
st Y+, x38
st Y+, x39
st Y+, x3a
st Y+, x3b
st Y+, x3c
st Y+, x3d
st Y+, x3e
st Y+, x3f
POP_CONFLICT
ret
.section .text
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
RC_LFSR8:
.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e
.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25
.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0
.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9
.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37
.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20
.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad
.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac
.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0
.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb
.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a
.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8
.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86
.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7
.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c
.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5
.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4
.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52
.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7
.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76
.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b
.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4
.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd
.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46
.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e
.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab
.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5
.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09
.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe
.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17
.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d
.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00
#elif defined(CRYPTO_AEAD)
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#else
RC_LFSR8:
.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e
.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25
.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0
.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9
.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37
.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20
.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad
.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac
.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0
.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb
.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a
.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8
.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86
.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7
.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c
.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5
.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4
.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52
.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7
.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76
.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b
.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4
.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd
.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46
.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e
.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab
.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5
.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09
.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe
.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17
.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d
.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00
#endif
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
;
; ============================================
; R E G I S T E R D E F I N I T I O N S
; ============================================
;
#define mclen r16
#define radlen r17
#define tcnt r17
#define tmp0 r20
#define tmp1 r21
#define cnt0 r22
#define rn r23
#define rate r24
; AEDH = 0b000: for authenticate AD
; AEDH = 0b001: for encryption
; AEDH = 0b011: for decryption
; AEDH = 0b100: for hash
#define AEDH r25
#define rcnt r26
#if (STATE_INBITS==256)
#include "knot256.h"
#elif (STATE_INBITS==384)
#include "knot384.h"
#elif (STATE_INBITS==512)
#include "knot512.h"
#else
#error "Not specified key size and state size"
#endif
#define CRYPTO_BYTES 32
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
.macro PUSH_CONFLICT
push r16
push r17
push r18
push r19
push r23
push r24
push r25
push r26
push r27
push r28
push r29
push r30
push r31
.endm
.macro POP_CONFLICT
pop r31
pop r30
pop r29
pop r28
pop r27
pop r26
pop r25
pop r24
pop r23
pop r19
pop r18
pop r17
pop r16
.endm
.macro PUSH_ALL
push r2
push r3
push r4
push r5
push r6
push r7
push r8
push r9
push r10
push r11
push r12
push r13
push r14
push r15
push r16
push r17
push r28
push r29
.endm
.macro POP_ALL
pop r29
pop r28
pop r17
pop r16
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
pop r9
pop r8
pop r7
pop r6
pop r5
pop r4
pop r3
pop r2
clr r1
.endm
\ No newline at end of file
#ifndef __CONFIG_H__
#define __CONFIG_H__
//#define CRYPTO_AEAD
#define CRYPTO_HASH
#define MAX_MESSAGE_LENGTH 128
#define STATE_INBITS 384
/* For CRYPTO_AEAD */
#define CRYPTO_KEYBITS 128
/* For CRYPTO_HASH */
#define CRYPTO_BITS 256
#define STATE_INBYTES ((STATE_INBITS + 7) / 8)
#define ROW_INBITS ((STATE_INBITS + 3) / 4)
#define ROW_INBYTES ((ROW_INBITS + 7) / 8)
/* For CRYPTO_AEAD */
#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8)
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES
#define CRYPTO_ABYTES CRYPTO_KEYBYTES
#define CRYPTO_NOOVERLAP 1
#define MAX_ASSOCIATED_DATA_LENGTH 32
#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES)
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
/* For CRYPTO_HASH */
#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8)
#define DOMAIN_BITS 0x80
#define PAD_BITS 0x01
#define S384_R192_BITS 0x80
#if (STATE_INBITS==256)
#define C1 1
#define C2 8
#define C3 25
#elif (STATE_INBITS==384)
#define C1 1
#define C2 8
#define C3 55
#elif (STATE_INBITS==512)
#define C1 1
#define C2 16
#define C3 25
#else
#error "Not specified state size"
#endif
#ifdef CRYPTO_AEAD
/* For CRYPTO_AEAD */
#define KEY_INBITS (CRYPTO_KEYBYTES * 8)
#define KEY_INBYTES (CRYPTO_KEYBYTES)
#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8)
#define NONCE_INBYTES (CRYPTO_NPUBBYTES)
#define TAG_INBITS (CRYPTO_ABYTES * 8)
#define TAG_INBYTES (CRYPTO_ABYTES)
#if (KEY_INBITS==128) && (STATE_INBITS==256)
#define RATE_INBITS 64
#define NR_0 52
#define NR_i 28
#define NR_f 32
#elif (KEY_INBITS==128) && (STATE_INBITS==384)
#define RATE_INBITS 192
#define NR_0 76
#define NR_i 28
#define NR_f 32
#elif (KEY_INBITS==192) && (STATE_INBITS==384)
#define RATE_INBITS 96
#define NR_0 76
#define NR_i 40
#define NR_f 44
#elif (KEY_INBITS==256) && (STATE_INBITS==512)
#define RATE_INBITS 128
#define NR_0 100
#define NR_i 52
#define NR_f 56
#else
#error "Not specified key size and state size"
#endif
#define RATE_INBYTES ((RATE_INBITS + 7) / 8)
#define SQUEEZE_RATE_INBYTES TAG_INBYTES
#endif
#ifdef CRYPTO_HASH
/* For CRYPTO_HASH */
#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8)
#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256)
#define HASH_RATE_INBITS 32
#define HASH_SQUEEZE_RATE_INBITS 128
#define NR_h 68
#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384)
#define HASH_RATE_INBITS 128
#define HASH_SQUEEZE_RATE_INBITS 128
#define NR_h 80
#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384)
#define HASH_RATE_INBITS 48
#define HASH_SQUEEZE_RATE_INBITS 192
#define NR_h 104
#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512)
#define HASH_RATE_INBITS 64
#define HASH_SQUEEZE_RATE_INBITS 256
#define NR_h 140
#else
#error "Not specified hash digest size and state size"
#endif
#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8)
#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8)
#endif
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
#endif
\ No newline at end of file
#ifdef __cplusplus
extern "C" {
#endif
int crypto_hash(
unsigned char *out,
const unsigned char *in,
unsigned long long inlen
);
#ifdef __cplusplus
}
#endif
\ No newline at end of file
#include <avr/io.h>
#include <avr/sfr_defs.h>
#include <stdlib.h>
#include <string.h>
#include "config.h"
extern void crypto_aead_encrypt_asm(
unsigned char *c,
const unsigned char *m,
unsigned char mlen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern int crypto_aead_decrypt_asm(
unsigned char *m,
const unsigned char *c,
unsigned char clen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern void crypto_hash_asm(
unsigned char *out,
const unsigned char *in,
unsigned char inlen
);
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the cipher implementation goes here,
... generating a ciphertext c[0],c[1],...,c[*clen-1]
... from a plaintext m[0],m[1],...,m[mlen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce npub[0],npub[1],..
... and secret key k[0],k[1],...
... the implementation shall not use nsec
...
... return 0;
*/
(void)nsec;
crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k);
*clen = mlen + TAG_INBYTES;
return 0;
}
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the AEAD implementation goes here,
... generating a plaintext m[0],m[1],...,m[*mlen-1]
... and secret message number nsec[0],nsec[1],...
... from a ciphertext c[0],c[1],...,c[clen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce number npub[0],npub[1],...
... and secret key k[0],k[1],...
...
... return 0;
*/
unsigned long long mlen_;
unsigned char tag_is_match;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
return -1;
}
mlen_ = clen - CRYPTO_ABYTES;
tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k);
if (tag_is_match != 0)
{
memset(m, 0, (size_t)mlen_);
return -1;
}
*mlen = mlen_;
return 0;
}
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
;
; ============================================
; S R A M D E F I N I T I O N S
; ============================================
;
#include <avr/io.h>
#include "config.h"
.section .noinit
SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#if (STATE_INBYTES > 32)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
#if (STATE_INBYTES > 48)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0
SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0
SRAM_MESSAGE_IN_LEN: .BYTE 0, 0
#ifdef CRYPTO_AEAD
; For CRYPTO_AEAD
SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0
SRAM_ADLEN: .BYTE 0, 0
SRAM_NONCE_ADDR: .BYTE 0, 0
SRAM_KEY_ADDR: .BYTE 0, 0
SRAM_ADDITIONAL:
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#if (CRYPTO_ABYTES > 16)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
#if (CRYPTO_ABYTES > 24)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
#endif
.section .text
#include "permutation.h"
; require YH:YL be the address of the current associated data/cipher/message block
; for enc and dec, store ciphertext or plaintext
; require ZH:ZL be the address of the current cipher/message block
.macro XOR_to_State_ENCDEC
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
mov cnt0, rate
XOR_to_State_loop_ENCDEC:
ld tmp0, Y+ ; plaintext/ciphertext
ld tmp1, X ; state
eor tmp1, tmp0 ; ciphertext/plaintext
st Z+, tmp1 ; store ciphertext/plaintext
sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte
mov tmp1, tmp0 ; if dec, replace state
st X+, tmp1 ; store state byte
dec cnt0
brne XOR_to_State_loop_ENCDEC
; YH:YL are now the address of the next associated data block
.endm
; require YH:YL be the address of the current associated data/cipher/message block
; for enc and dec, store ciphertext or plaintext
; require ZH:ZL be the address of the current cipher/message block
.macro XOR_to_State_AUTH
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
mov cnt0, rate
XOR_to_State_loop_AUTH:
ld tmp0, Y+ ; plaintext/ciphertext
ld tmp1, X ; state
eor tmp1, tmp0 ; ciphertext/plaintext
st X+, tmp1 ; store state byte
dec cnt0
brne XOR_to_State_loop_AUTH
; YH:YL are now the address of the next associated data block
.endm
; require YH:YL pointed to the input data
; require ZH:ZL pointed to the output data
; require cnt0 containes the nubmer of bytes in source data
; require number of bytes in source data less than rate, i.e., 0 <= cnt0 < rate
;
; the 0th bit in AEDH is used to distinguish (auth AD) or (enc/dec M/C):
; AEDH[0] = 0 for (auth AD), AEDH[0] = 1 for (enc/dec M/C)
; the 1th bit in AEDH is used to distinguish (auth AD/enc M) or (dec C):
; AEDH[1] = 0 for (auth AD/enc M), AEDH[1] = 1 for (dec C)
; AEDH = 0b000 for (auth AD)
; AEDH = 0b001 for (enc M)
; AEDH = 0b011 for (dec C)
Pad_XOR_to_State:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
tst cnt0
breq XOR_padded_data
XOR_source_data_loop:
ld tmp0, Y+ ; plaintext/ciphertext
ld tmp1, X ; state
eor tmp1, tmp0 ; ciphertext/plaintext
sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result
st Z+, tmp1 ; store ciphertext/plaintext
sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte
mov tmp1, tmp0 ; if dec, replace state
st X+, tmp1 ; store state byte
dec cnt0
brne XOR_source_data_loop
XOR_padded_data:
ldi tmp0, PAD_BITS
ld tmp1, X
eor tmp1, tmp0
st X, tmp1
ret
AddDomain:
ldi XH, hi8(SRAM_STATE + STATE_INBYTES - 1)
ldi XL, lo8(SRAM_STATE + STATE_INBYTES - 1)
ldi tmp0, DOMAIN_BITS
ld tmp1, X
eor tmp0, tmp1
st X, tmp0
ret
; require ZH:ZL be the address of the destination
EXTRACT_from_State:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
mov tmp1, rate
EXTRACT_from_State_loop:
ld tmp0, X+
st Z+, tmp0
dec tmp1
brne EXTRACT_from_State_loop
ret
AUTH:
tst radlen
breq AUTH_end
cp radlen, rate
brlo auth_ad_padded_block
auth_ad_loop:
XOR_to_State_AUTH
rcall Permutation
sub radlen, rate
cp radlen, rate
brlo auth_ad_padded_block
rjmp auth_ad_loop
auth_ad_padded_block:
mov cnt0, radlen
rcall Pad_XOR_to_State
rcall Permutation
AUTH_end:
ret
#ifdef CRYPTO_AEAD
Initialization:
ldi rn, NR_0
ldi XL, lo8(SRAM_STATE)
ldi XH, hi8(SRAM_STATE)
lds YH, SRAM_NONCE_ADDR
lds YL, SRAM_NONCE_ADDR + 1
ldi cnt0, CRYPTO_NPUBBYTES
load_nonce_loop:
ld tmp0, Y+
st X+, tmp0
dec cnt0
brne load_nonce_loop
lds YH, SRAM_KEY_ADDR
lds YL, SRAM_KEY_ADDR + 1
ldi cnt0, CRYPTO_KEYBYTES
load_key_loop:
ld tmp0, Y+
st X+, tmp0
dec cnt0
brne load_key_loop
#if (STATE_INBITS==384) && (RATE_INBITS==192)
ldi cnt0, (STATE_INBYTES - CRYPTO_NPUBBYTES - CRYPTO_KEYBYTES - 1)
clr tmp0
empty_state_loop:
st X+, tmp0
dec cnt0
brne empty_state_loop
ldi tmp0, S384_R192_BITS
st X+, tmp0
#endif
rcall Permutation
ret
ENC:
tst mclen
breq ENC_end
cp mclen, rate
brlo enc_padded_block
enc_loop:
XOR_to_State_ENCDEC
ldi rn, NR_i
rcall Permutation
sub mclen, rate
cp mclen, rate
brlo enc_padded_block
rjmp enc_loop
enc_padded_block:
mov cnt0, mclen
rcall Pad_XOR_to_State
ENC_end:
ret
Finalization:
ldi rate, SQUEEZE_RATE_INBYTES
ldi rn, NR_f
rcall Permutation
rcall EXTRACT_from_State
ret
; void crypto_aead_encrypt_asm(
; unsigned char *c,
; const unsigned char *m,
; unsigned long long mlen,
; const unsigned char *ad,
; unsigned long long adlen,
; const unsigned char *npub,
; const unsigned char *k
; )
;
; unsigned char *c, is passed in r24:r25
; const unsigned char *m, is passed in r22:r23
; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used
; const unsigned char *ad, is passed in r18:r19
; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used
; const unsigned char *npub, is passed in r14:r15
; const unsigned char *k is passed in r12:r13
.global crypto_aead_encrypt_asm
crypto_aead_encrypt_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN
st X+, r20
st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR
st X+, r18
st X+, r17 ;store associated data length in SRAM_ADLEN
st X+, r16
st X+, r15 ;store nonce address in SRAM_NONCE_ADDR
st X+, r14
st X+, r13 ;store key address in SRAM_KEY_ADDR
st X+, r12
mov radlen, r16
mov mclen, r20
rcall Initialization
ldi rn, NR_i
ldi rate, RATE_INBYTES
ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_ASSOCIATED_DATA_ADDR
lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1
rcall AUTH
rcall AddDomain
ldi AEDH, 0b001 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_MESSAGE_IN_ADDR
lds YL, SRAM_MESSAGE_IN_ADDR + 1
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
rcall ENC
rcall Finalization
POP_ALL
ret
; int crypto_aead_decrypt_asm(
; unsigned char *m,
; const unsigned char *c,
; unsigned long long clen,
; const unsigned char *ad,
; unsigned long long adlen,
; const unsigned char *npub,
; const unsigned char *k
; )
;
; unsigned char *m, is passed in r24:r25
; const unsigned char *c, is passed in r22:r23
; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used
; const unsigned char *ad, is passed in r18:r19
; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used
; const unsigned char *npub, is passed in r14:r15
; const unsigned char *k is passed in r12:r13
.global crypto_aead_decrypt_asm
crypto_aead_decrypt_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN
st X+, r20
st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR
st X+, r18
st X+, r17 ;store associated data length in SRAM_ADLEN
st X+, r16
st X+, r15 ;store nonce address in SRAM_NONCE_ADDR
st X+, r14
st X+, r13 ;store key address in SRAM_KEY_ADDR
st X+, r12
mov radlen, r16
mov mclen, r20
rcall Initialization
ldi rn, NR_i
ldi rate, RATE_INBYTES
ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_ASSOCIATED_DATA_ADDR
lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1
rcall AUTH
rcall AddDomain
ldi AEDH, 0b011 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_MESSAGE_IN_ADDR
lds YL, SRAM_MESSAGE_IN_ADDR + 1
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
rcall ENC
ldi ZH, hi8(SRAM_ADDITIONAL)
ldi ZL, lo8(SRAM_ADDITIONAL)
rcall Finalization
sbiw ZL, CRYPTO_ABYTES
ldi cnt0, CRYPTO_ABYTES
compare_tag:
ld tmp0, Z+
ld tmp1, Y+
cp tmp0, tmp1
brne return_tag_not_match
dec cnt0
brne compare_tag
rjmp return_tag_match
return_tag_not_match:
ldi r25, 0xFF
ldi r24, 0xFF
rjmp crypto_aead_decrypt_end
return_tag_match:
clr r25
clr r24
crypto_aead_decrypt_end:
POP_ALL
ret
; #ifdef CRYPTO_AEAD
#endif
#ifdef CRYPTO_HASH
; void crypto_hash_asm(
; unsigned char *out,
; const unsigned char *in,
; unsigned long long inlen
; )
;
; unsigned char *out, is passed in r24:r25
; const unsigned char *in, is passed in r22:r23
; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used
.global crypto_hash_asm
crypto_hash_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN
st X+, r20
mov mclen, r20
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128)
ldi cnt0, STATE_INBYTES - 1
#else
ldi cnt0, STATE_INBYTES
#endif
clr tmp0
zero_state:
st X+, tmp0
dec cnt0
brne zero_state
#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128)
ldi tmp0, S384_R192_BITS
st X+, tmp0
#endif
ldi rn, NR_h
ldi AEDH, 0b100
HASH_ABSORBING:
mov radlen, mclen
tst radlen
breq EMPTY_M
ldi rate, HASH_RATE_INBYTES
lds YH, SRAM_MESSAGE_IN_ADDR
lds YL, SRAM_MESSAGE_IN_ADDR + 1
rcall AUTH
rjmp HASH_SQUEEZING
EMPTY_M:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
ldi tmp0, PAD_BITS
ld tmp1, X
eor tmp1, tmp0
st X, tmp1
rcall Permutation
HASH_SQUEEZING:
ldi rate, HASH_SQUEEZE_RATE_INBYTES
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
ldi tcnt, CRYPTO_BYTES
SQUEEZING_loop:
rcall EXTRACT_from_State
subi tcnt, HASH_SQUEEZE_RATE_INBYTES
breq HASH_SQUEEZING_end
rcall Permutation
rjmp SQUEEZING_loop
HASH_SQUEEZING_end:
POP_ALL
ret
#endif
; Byte Order In AVR 8:
; KNOT-AEAD(128, 256, 64):
; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[0] 0 Tag[ 0]
; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[1] 0 Tag[ 1]
; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[2] 0 Tag[ 2]
; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[3] 0 Tag[ 3]
; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[4] 0 Tag[ 4]
; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[5] 0 Tag[ 5]
; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[6] 0 Tag[ 6]
; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[7] <<< 0 Tag[ 7]
; N[ 8] AEAD_State[ 8] | Perm_row_1[0] 1
; N[ 9] AEAD_State[ 9] | Perm_row_1[1] 1
; N[10] AEAD_State[10] | Perm_row_1[2] 1
; N[11] AEAD_State[11] | Perm_row_1[3] 1
; N[12] AEAD_State[12] | Perm_row_1[4] 1
; N[13] AEAD_State[13] | Perm_row_1[5] 1
; N[14] AEAD_State[14] | Perm_row_1[6] 1
; N[15] AEAD_State[15] | Perm_row_1[7] <<< 1
; K[ 0] AEAD_State[16] | Perm_row_2[0] 8
; K[ 1] AEAD_State[17] | Perm_row_2[1] 8
; K[ 2] AEAD_State[18] | Perm_row_2[2] 8
; K[ 3] AEAD_State[19] | Perm_row_2[3] 8
; K[ 4] AEAD_State[20] | Perm_row_2[4] 8
; K[ 5] AEAD_State[21] | Perm_row_2[5] 8
; K[ 6] AEAD_State[22] | Perm_row_2[6] 8
; K[ 7] AEAD_State[23] | Perm_row_2[7] <<< 8
; K[ 8] AEAD_State[24] | Perm_row_3[0] 25
; K[ 9] AEAD_State[25] | Perm_row_3[1] 25
; K[10] AEAD_State[26] | Perm_row_3[2] 25
; K[11] AEAD_State[27] | Perm_row_3[3] 25
; K[12] AEAD_State[28] | Perm_row_3[4] 25
; K[13] AEAD_State[29] | Perm_row_3[5] 25
; K[14] AEAD_State[30] | Perm_row_3[6] 25
; K[15] AEAD_State[31] | ^0x80 Perm_row_3[7] <<< 25
;
;
; KNOT-AEAD(128, 384, 192):
; Initalization
; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[ 0] 0 Tag[ 0]
; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[ 1] 0 Tag[ 1]
; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[ 2] 0 Tag[ 2]
; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[ 3] 0 Tag[ 3]
; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[ 4] 0 Tag[ 4]
; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[ 5] 0 Tag[ 5]
; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[ 6] 0 Tag[ 6]
; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[ 7] 0 Tag[ 7]
; N[ 8] AEAD_State[ 8] | Message[ 8] 0x00 Perm_row_0[ 8] 0 Tag[ 8]
; N[ 9] AEAD_State[ 9] | Message[ 9] 0x00 Perm_row_0[ 9] 0 Tag[ 9]
; N[10] AEAD_State[10] | Message[10] 0x00 Perm_row_0[10] 0 Tag[10]
; N[11] AEAD_State[11] | Message[11] 0x00 Perm_row_0[11] <<< 0 Tag[11]
; N[12] AEAD_State[12] | Message[12] 0x00 Perm_row_1[ 0] 1 Tag[12]
; N[13] AEAD_State[13] | Message[13] 0x00 Perm_row_1[ 1] 1 Tag[13]
; N[14] AEAD_State[14] | Message[14] 0x00 Perm_row_1[ 2] 1 Tag[14]
; N[15] AEAD_State[15] | Message[15] 0x00 Perm_row_1[ 3] 1 Tag[15]
; K[ 0] AEAD_State[16] | Message[16] 0x00 Perm_row_1[ 4] 1
; K[ 1] AEAD_State[17] | Message[17] 0x00 Perm_row_1[ 5] 1
; K[ 2] AEAD_State[18] | Message[18] 0x00 Perm_row_1[ 6] 1
; K[ 3] AEAD_State[19] | Message[19] 0x00 Perm_row_1[ 7] 1
; K[ 4] AEAD_State[20] | Message[20] 0x00 Perm_row_1[ 8] 1
; K[ 5] AEAD_State[21] | Message[21] 0x00 Perm_row_1[ 9] 1
; K[ 6] AEAD_State[22] | Message[22] 0x00 Perm_row_1[10] 1
; K[ 7] AEAD_State[23] | Message[23] 0x00 Perm_row_1[11] <<< 1
; K[ 8] AEAD_State[24] | Perm_row_2[ 0] 8
; K[ 9] AEAD_State[25] | Perm_row_2[ 1] 8
; K[10] AEAD_State[26] | Perm_row_2[ 2] 8
; K[11] AEAD_State[27] | Perm_row_2[ 3] 8
; K[12] AEAD_State[28] | Perm_row_2[ 4] 8
; K[13] AEAD_State[29] | Perm_row_2[ 5] 8
; K[14] AEAD_State[30] | Perm_row_2[ 6] 8
; K[15] AEAD_State[31] | Perm_row_2[ 7] 8
; 0x00 AEAD_State[32] | Perm_row_2[ 8] 8
; 0x00 AEAD_State[33] | Perm_row_2[ 9] 8
; 0x00 AEAD_State[34] | Perm_row_2[10] 8
; 0x00 AEAD_State[35] | Perm_row_2[11] <<< 8
; 0x00 AEAD_State[36] | Perm_row_3[ 0] 55
; 0x00 AEAD_State[37] | Perm_row_3[ 1] 55
; 0x00 AEAD_State[38] | Perm_row_3[ 2] 55
; 0x00 AEAD_State[39] | Perm_row_3[ 3] 55
; 0x00 AEAD_State[40] | Perm_row_3[ 4] 55
; 0x00 AEAD_State[41] | Perm_row_3[ 5] 55
; 0x00 AEAD_State[42] | Perm_row_3[ 6] 55
; 0x00 AEAD_State[43] | Perm_row_3[ 7] 55
; 0x00 AEAD_State[44] | Perm_row_3[ 8] 55
; 0x00 AEAD_State[45] | Perm_row_3[ 9] 55
; 0x00 AEAD_State[46] | Perm_row_3[10] 55
; 0x00 ^0x80 AEAD_State[47] | ^0x80 Perm_row_3[11] <<< 55
#include <avr/io.h>
#include <avr/sfr_defs.h>
#include <stdlib.h>
#include <string.h>
#include "api.h"
#include "crypto_hash.h"
extern void crypto_hash_asm(
unsigned char *out,
const unsigned char *in,
unsigned char inlen
);
int crypto_hash(
unsigned char *out,
const unsigned char *in,
unsigned long long inlen
)
{
/*
...
... the code for the hash function implementation goes here
... generating a hash value out[0],out[1],...,out[CRYPTO_BYTES-1]
... from a message in[0],in[1],...,in[in-1]
...
... return 0;
*/
crypto_hash_asm(out, in, inlen);
return 0;
}
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
#define x10 r0
#define x11 r1
#define x12 r2
#define x13 r3
#define x14 r4
#define x15 r5
#define x16 r6
#define x17 r7
; an intentionally arrangement of registers to facilitate movw
#define x20 r8
#define x21 r10
#define x22 r12
#define x23 r14
#define x24 r9
#define x25 r11
#define x26 r13
#define x27 r15
; an intentionally arrangement of registers to facilitate movw
#define x30 r16
#define x35 r18
#define x32 r20
#define x37 r22
#define x34 r17
#define x31 r19
#define x36 r21
#define x33 r23
#define t0j r24
#define t1j r25
#define x0j r27
#include "assist.h"
.macro Sbox i0, i1, i2, i3
mov t0j, \i1
com \i0
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
ld x10, Y+
ld x11, Y+
ld x12, Y+
ld x13, Y+
ld x14, Y+
ld x15, Y+
ld x16, Y+
ld x17, Y+
ld x20, Y+
ld x21, Y+
ld x22, Y+
ld x23, Y+
ld x24, Y+
ld x25, Y+
ld x26, Y+
ld x27, Y+
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH
rjmp For_Hash
For_AEAD:
ldi ZL, lo8(RC_LFSR6)
ldi ZH, hi8(RC_LFSR6)
rjmp round_loop_start
For_Hash:
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
#elif defined(CRYPTO_AEAD)
ldi ZL, lo8(RC_LFSR6)
ldi ZH, hi8(RC_LFSR6)
#else
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
#endif
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
ld x0j, Y
eor x0j, t0j
; SubColumns
Sbox x0j, x10, x20, x30
st Y+, x0j
ld x0j, Y
Sbox x0j, x11, x21, x31
st Y+, x0j
ld x0j, Y
Sbox x0j, x12, x22, x32
st Y+, x0j
ld x0j, Y
Sbox x0j, x13, x23, x33
st Y+, x0j
ld x0j, Y
Sbox x0j, x14, x24, x34
st Y+, x0j
ld x0j, Y
Sbox x0j, x15, x25, x35
st Y+, x0j
ld x0j, Y
Sbox x0j, x16, x26, x36
st Y+, x0j
ld x0j, Y
Sbox x0j, x17, x27, x37
st Y, x0j
; ShiftRows
; <<< 1
mov t0j, x17
rol t0j
rol x10
rol x11
rol x12
rol x13
rol x14
rol x15
rol x16
rol x17
; <<< 8
; 7 6 5 4 3 2 1 0 => 6 5 4 3 2 1 0 7
;mov t0j, x27
;mov x27, x26
;mov x26, x25
;mov x25, x24
;mov x24, x23
;mov x23, x22
;mov x22, x21
;mov x21, x20
;mov x20, t0j
; an intentionally arrangement of registers to facilitate movw
movw t0j, x23 ; t1j:t0j <= x27:x23
movw x23, x22 ; x27:x23 <= x26:x22
movw x22, x21 ; x26:x22 <= x25:x21
movw x21, x20 ; x25:x21 <= x24:x20
mov x20, t1j ; x20 <= t1j
mov x24, t0j ; x24 <= t0j
; <<< 1
mov t0j, x37
rol t0j
rol x30
rol x31
rol x32
rol x33
rol x34
rol x35
rol x36
rol x37
; <<< 24
; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5
;mov t0j, x30
;mov x30, x35
;mov x35, x32
;mov x32, x37
;mov x37, x34
;mov x34, x31
;mov x31, x36
;mov x36, x33
;mov x33, t0j
; an intentionally arrangement of registers to facilitate movw
;x30 r16
;x35 r18
;x32 r20
;x37 r22
;x34 r17
;x31 r19
;x36 r21
;x33 r23
movw t0j, x30 ; t1j:t0j <= x34:x30
movw x30, x35 ; x34:x30 <= x31:x35
movw x35, x32 ; x31:x35 <= x36:x32
movw x32, x37 ; x36:x32 <= x33:x37
mov x37, t1j ; x37 <= x34
mov x33, t0j ; x33 <= x30
dec rcnt
breq round_loop_end
jmp round_loop_start
round_loop_end:
ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
st Y+, x10
st Y+, x11
st Y+, x12
st Y+, x13
st Y+, x14
st Y+, x15
st Y+, x16
st Y+, x17
st Y+, x20
st Y+, x21
st Y+, x22
st Y+, x23
st Y+, x24
st Y+, x25
st Y+, x26
st Y+, x27
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
POP_CONFLICT
ret
.section .text
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
RC_LFSR6:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06
.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29
.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28
.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24
.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37
.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26
.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f
.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#elif defined(CRYPTO_AEAD)
RC_LFSR6:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06
.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29
.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28
.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24
.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37
.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26
.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f
.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00
#else
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#endif
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
; an intentionally arrangement of registers to facilitate movw
#define x20 r0
#define x21 r2
#define x22 r4
#define x23 r6
#define x24 r8
#define x25 r10
#define x26 r1
#define x27 r3
#define x28 r5
#define x29 r7
#define x2a r9
#define x2b r11
; an intentionally arrangement of registers to facilitate movw
#define x30 r22
#define x35 r20
#define x3a r18
#define x33 r16
#define x38 r14
#define x31 r12
#define x36 r23
#define x3b r21
#define x34 r19
#define x39 r17
#define x32 r15
#define x37 r13
#define t0j r24
#define t1j r25
#define x0j r25
#define x1j r27
#include "assist.h"
.macro Sbox i0, i1, i2, i3
ldi t0j, 0xFF
eor \i0, t0j
mov t0j, \i1
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
.macro OneColumn i0, i1, i2, i3
ld \i0, Y
ldd \i1, Y + ROW_INBYTES
Sbox \i0, \i1, \i2, \i3
st Y+, \i0
rol \i1 ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES -1, \i1
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
ld x20, Y+
ld x21, Y+
ld x22, Y+
ld x23, Y+
ld x24, Y+
ld x25, Y+
ld x26, Y+
ld x27, Y+
ld x28, Y+
ld x29, Y+
ld x2a, Y+
ld x2b, Y+
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
ld x38, Y+
ld x39, Y+
ld x3a, Y+
ld x3b, Y+
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
ld x0j, Y
eor x0j, t0j
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x20, x30
st Y+, x0j
lsl x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES -1, x1j
OneColumn x0j, x1j, x21, x31
OneColumn x0j, x1j, x22, x32
OneColumn x0j, x1j, x23, x33
OneColumn x0j, x1j, x24, x34
OneColumn x0j, x1j, x25, x35
OneColumn x0j, x1j, x26, x36
OneColumn x0j, x1j, x27, x37
OneColumn x0j, x1j, x28, x38
OneColumn x0j, x1j, x29, x39
OneColumn x0j, x1j, x2a, x3a
OneColumn x0j, x1j, x2b, x3b
ld x1j, Y
eor t0j, t0j
adc x1j, t0j
st Y, x1j
; b a 9 8 7 6 5 4 3 2 1 0
; -- -- -- -- -- -- -- -- -- -- -- x- 0
; -- -- -- -- -- -- -- -- -- -- -- x' 0
; -- -- -- -- -- -- -- -- -- -- x- -- 1
; -- -- -- -- x' -- -- -- -- -- -- -- 7
; 4 3 2 1 0 b a 9 8 7 6 5
; ShiftRows -- the last two rows
; <<< 8
; b a 9 8 7 6 5 4 3 2 1 0 => a 9 8 7 6 5 4 3 2 1 0 b
movw t0j, x25 ; t1j:t0j <= x2b:x25
movw x25, x24 ; x2b:x25 <= x2a:x24
movw x24, x23 ; x2a:x24 <= x29:x23
movw x23, x22 ; x29:x23 <= x28:x22
movw x22, x21 ; x28:x22 <= x27:x21
movw x21, x20 ; x27:x21 <= x26:x20
mov x26, t0j ; x26 <= x25
mov x20, t1j ; x20 <= x2b
; >>> 1
mov t0j, x3b
ror t0j
ror x3a
ror x39
ror x38
ror x37
ror x36
ror x35
ror x34
ror x33
ror x32
ror x31
ror x30
ror x3b
; <<< 56
; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5
; mov x3j, x30
; mov x30, x35
; mov x35, x3a
; mov x3a, x33
; mov x33, x38
; mov x38, x31
; mov x31, x36
; mov x36, x3b
; mov x3b, x34
; mov x34, x39
; mov x39, x32
; mov x32, x37
; mov x37, x3j
; an intentionally arrangement of registers to facilitate movw
; x30 r22
; x35 r20
; x3a r18
; x33 r16
; x38 r14
; x31 r12
; x36 r23
; x3b r21
; x34 r19
; x39 r17
; x32 r15
; x37 r13
movw t0j, x30 ; t1j:t0j <= x36:x30
movw x30, x35 ; x36:x30 <= x3b:x35
movw x35, x3a ; x3b:x35 <= x34:x3a
movw x3a, x33 ; x34:x3a <= x39:x33
movw x33, x38 ; x39:x33 <= x32:x38
movw x38, x31 ; x32:x38 <= x37:x31
mov x31, t1j ; x31 <= x36
mov x37, t0j ; x37 <= x30
dec rcnt
breq round_loop_end
jmp round_loop_start
round_loop_end:
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
st Y+, x20
st Y+, x21
st Y+, x22
st Y+, x23
st Y+, x24
st Y+, x25
st Y+, x26
st Y+, x27
st Y+, x28
st Y+, x29
st Y+, x2a
st Y+, x2b
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
st Y+, x38
st Y+, x39
st Y+, x3a
st Y+, x3b
POP_CONFLICT
ret
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
#define x20 r0
#define x22 r2
#define x24 r4
#define x26 r6
#define x28 r1
#define x2a r3
#define x2c r5
#define x2e r7
#define x30 r8
#define x3d r10
#define x3a r12
#define x37 r14
#define x34 r16
#define x31 r18
#define x3e r20
#define x3b r22
#define x38 r9
#define x35 r11
#define x32 r13
#define x3f r15
#define x3c r17
#define x39 r19
#define x36 r21
#define x33 r23
#define t0j r24
#define t1j r25
#define x0j r25
#define x1j r27
#define x2j r26
#include "assist.h"
.macro Sbox i0, i1, i2, i3
ldi t0j, 0xFF
eor \i0, t0j
mov t0j, \i1
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
.macro TwoColumns i2_e, i3_e, i3_o
; column 2i
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, \i2_e, \i3_e
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
; column 2i+1
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x2j, \i3_o
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
ldd t0j, Y + 2 * ROW_INBYTES + 1
std Y + 2 * ROW_INBYTES + 1, x2j
mov x2j, t0j
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
push rcnt
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
ldd x20, Y + 0x00
ldd x22, Y + 0x02
ldd x24, Y + 0x04
ldd x26, Y + 0x06
ldd x28, Y + 0x08
ldd x2a, Y + 0x0a
ldd x2c, Y + 0x0c
ldd x2e, Y + 0x0e
adiw YL, ROW_INBYTES
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
ld x38, Y+
ld x39, Y+
ld x3a, Y+
ld x3b, Y+
ld x3c, Y+
ld x3d, Y+
ld x3e, Y+
ld x3f, Y+
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH
rjmp For_Hash
For_AEAD:
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
rjmp round_loop_start
For_Hash:
ldi ZL, lo8(RC_LFSR8)
ldi ZH, hi8(RC_LFSR8)
#elif defined(CRYPTO_AEAD)
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
#else
ldi ZL, lo8(RC_LFSR8)
ldi ZH, hi8(RC_LFSR8)
#endif
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
; column 0
ld x0j, Y
eor x0j, t0j
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x20, x30
st Y+, x0j
lsl x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
; column 1
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
ldd x2j, Y + 2 * ROW_INBYTES
Sbox x0j, x1j, x2j, x31
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
ldd t0j, Y + 2 * ROW_INBYTES + 1
std Y + 2 * ROW_INBYTES + 1, x2j
mov x2j, t0j
; column 2, 3
TwoColumns x22, x32, x33
; column 4, 5
TwoColumns x24, x34, x35
; column 6, 7
TwoColumns x26, x36, x37
; column 8, 9
TwoColumns x28, x38, x39
; column 10, 11
TwoColumns x2a, x3a, x3b
; column 12, 13
TwoColumns x2c, x3c, x3d
; column 14
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x2e, x3e
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
; column 15
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x2j, x3f
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
ld x1j, Y
eor t0j, t0j
adc x1j, t0j
st Y, x1j
std Y + ROW_INBYTES + 1, x2j
; f e d c b a 9 8 7 6 5 4 3 2 1 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2
; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3
; c b a 9 8 7 6 5 4 3 2 1 0 f e d
; x2e x2c x2a x28 x26 x24 x22 x20 => x2c x2a x28 x26 x24 x22 x20 x2e
;mov t0j, x2e
;mov x2e, x2c
;mov x2c, x2a
;mov x2a, x28
;mov x28, x26
;mov x26, x24
;mov x24, x22
;mov x22, x20
;mov x20, t0j
; an intentionally arrangement of registers to facilitate movw
movw t0j, x26 ; t1j:t0j <= x2e:x26
movw x26, x24 ; x2e:x26 <= x2c:x24
movw x24, x22 ; x2c:x24 <= x2a:x22
movw x22, x20 ; x2a:x22 <= x28:x20
mov x20, t1j ; x20 <= t1j
mov x28, t0j ; x28 <= t0j
; <<< 1
mov t0j, x3f
rol t0j
rol x30
rol x31
rol x32
rol x33
rol x34
rol x35
rol x36
rol x37
rol x38
rol x39
rol x3a
rol x3b
rol x3c
rol x3d
rol x3e
rol x3f
; <<< 24
; f e d c b a 9 8 7 6 5 4 3 2 1 0 =>
; c b a 9 8 7 6 5 4 3 2 1 0 f e d
; mov x3j, x30
; mov x30, x3d
; mov x3d, x3a
; mov x3a, x37
; mov x37, x34
; mov x34, x31
; mov x31, x3e
; mov x3e, x3b
; mov x3b, x38
; mov x38, x35
; mov x35, x32
; mov x32, x3f
; mov x3f, x3c
; mov x3c, x39
; mov x39, x36
; mov x36, x33
; mov x33, x3j
; an intentionally arrangement of registers to facilitate movw
; x30 r8
; x3d r10
; x3a r12
; x37 r14
; x34 r16
; x31 r18
; x3e r20
; x3b r22
; x38 r9
; x35 r11
; x32 r13
; x3f r15
; x3c r17
; x39 r19
; x36 r21
; x33 r23
movw t0j, x30 ; t1j:t0j <= x38:x30
movw x30, x3d ; x38:x30 <= x35:x3d
movw x3d, x3a ; x35:x3d <= x32:x3a
movw x3a, x37 ; x32:x3a <= x3f:x37
movw x37, x34 ; x3f:x37 <= x3c:x34
movw x34, x31 ; x3c:x34 <= x39:x31
movw x31, x3e ; x39:x31 <= x36:x3e
movw x3e, x3b ; x36:x3e <= x33:x3b
mov x3b, t1j ; x3b <= x38
mov x33, t0j ; x33 <= x30
pop rcnt
dec rcnt
push rcnt
breq round_loop_end
rjmp round_loop_start
round_loop_end:
pop rcnt
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
std Y + 0x00, x20
std Y + 0x02, x22
std Y + 0x04, x24
std Y + 0x06, x26
std Y + 0x08, x28
std Y + 0x0a, x2a
std Y + 0x0c, x2c
std Y + 0x0e, x2e
adiw YL, ROW_INBYTES
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
st Y+, x38
st Y+, x39
st Y+, x3a
st Y+, x3b
st Y+, x3c
st Y+, x3d
st Y+, x3e
st Y+, x3f
POP_CONFLICT
ret
.section .text
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
RC_LFSR8:
.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e
.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25
.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0
.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9
.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37
.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20
.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad
.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac
.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0
.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb
.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a
.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8
.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86
.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7
.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c
.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5
.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4
.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52
.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7
.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76
.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b
.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4
.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd
.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46
.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e
.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab
.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5
.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09
.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe
.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17
.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d
.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00
#elif defined(CRYPTO_AEAD)
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#else
RC_LFSR8:
.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e
.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25
.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0
.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9
.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37
.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20
.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad
.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac
.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0
.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb
.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a
.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8
.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86
.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7
.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c
.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5
.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4
.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52
.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7
.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76
.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b
.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4
.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd
.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46
.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e
.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab
.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5
.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09
.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe
.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17
.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d
.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00
#endif
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
;
; ============================================
; R E G I S T E R D E F I N I T I O N S
; ============================================
;
#define mclen r16
#define radlen r17
#define tcnt r17
#define tmp0 r20
#define tmp1 r21
#define cnt0 r22
#define rn r23
#define rate r24
; AEDH = 0b000: for authenticate AD
; AEDH = 0b001: for encryption
; AEDH = 0b011: for decryption
; AEDH = 0b100: for hash
#define AEDH r25
#define rcnt r26
#if (STATE_INBITS==256)
#include "knot256.h"
#elif (STATE_INBITS==384)
#include "knot384.h"
#elif (STATE_INBITS==512)
#include "knot512.h"
#else
#error "Not specified key size and state size"
#endif
#define CRYPTO_BYTES 48
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
.macro PUSH_CONFLICT
push r16
push r17
push r18
push r19
push r23
push r24
push r25
push r26
push r27
push r28
push r29
push r30
push r31
.endm
.macro POP_CONFLICT
pop r31
pop r30
pop r29
pop r28
pop r27
pop r26
pop r25
pop r24
pop r23
pop r19
pop r18
pop r17
pop r16
.endm
.macro PUSH_ALL
push r2
push r3
push r4
push r5
push r6
push r7
push r8
push r9
push r10
push r11
push r12
push r13
push r14
push r15
push r16
push r17
push r28
push r29
.endm
.macro POP_ALL
pop r29
pop r28
pop r17
pop r16
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
pop r9
pop r8
pop r7
pop r6
pop r5
pop r4
pop r3
pop r2
clr r1
.endm
\ No newline at end of file
#ifndef __CONFIG_H__
#define __CONFIG_H__
//#define CRYPTO_AEAD
#define CRYPTO_HASH
#define MAX_MESSAGE_LENGTH 128
#define STATE_INBITS 384
/* For CRYPTO_AEAD */
#define CRYPTO_KEYBITS 192
/* For CRYPTO_HASH */
#define CRYPTO_BITS 384
#define STATE_INBYTES ((STATE_INBITS + 7) / 8)
#define ROW_INBITS ((STATE_INBITS + 3) / 4)
#define ROW_INBYTES ((ROW_INBITS + 7) / 8)
/* For CRYPTO_AEAD */
#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8)
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES
#define CRYPTO_ABYTES CRYPTO_KEYBYTES
#define CRYPTO_NOOVERLAP 1
#define MAX_ASSOCIATED_DATA_LENGTH 32
#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES)
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
/* For CRYPTO_HASH */
#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8)
#define DOMAIN_BITS 0x80
#define PAD_BITS 0x01
#define S384_R192_BITS 0x80
#if (STATE_INBITS==256)
#define C1 1
#define C2 8
#define C3 25
#elif (STATE_INBITS==384)
#define C1 1
#define C2 8
#define C3 55
#elif (STATE_INBITS==512)
#define C1 1
#define C2 16
#define C3 25
#else
#error "Not specified state size"
#endif
#ifdef CRYPTO_AEAD
/* For CRYPTO_AEAD */
#define KEY_INBITS (CRYPTO_KEYBYTES * 8)
#define KEY_INBYTES (CRYPTO_KEYBYTES)
#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8)
#define NONCE_INBYTES (CRYPTO_NPUBBYTES)
#define TAG_INBITS (CRYPTO_ABYTES * 8)
#define TAG_INBYTES (CRYPTO_ABYTES)
#if (KEY_INBITS==128) && (STATE_INBITS==256)
#define RATE_INBITS 64
#define NR_0 52
#define NR_i 28
#define NR_f 32
#elif (KEY_INBITS==128) && (STATE_INBITS==384)
#define RATE_INBITS 192
#define NR_0 76
#define NR_i 28
#define NR_f 32
#elif (KEY_INBITS==192) && (STATE_INBITS==384)
#define RATE_INBITS 96
#define NR_0 76
#define NR_i 40
#define NR_f 44
#elif (KEY_INBITS==256) && (STATE_INBITS==512)
#define RATE_INBITS 128
#define NR_0 100
#define NR_i 52
#define NR_f 56
#else
#error "Not specified key size and state size"
#endif
#define RATE_INBYTES ((RATE_INBITS + 7) / 8)
#define SQUEEZE_RATE_INBYTES TAG_INBYTES
#endif
#ifdef CRYPTO_HASH
/* For CRYPTO_HASH */
#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8)
#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256)
#define HASH_RATE_INBITS 32
#define HASH_SQUEEZE_RATE_INBITS 128
#define NR_h 68
#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384)
#define HASH_RATE_INBITS 128
#define HASH_SQUEEZE_RATE_INBITS 128
#define NR_h 80
#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384)
#define HASH_RATE_INBITS 48
#define HASH_SQUEEZE_RATE_INBITS 192
#define NR_h 104
#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512)
#define HASH_RATE_INBITS 64
#define HASH_SQUEEZE_RATE_INBITS 256
#define NR_h 140
#else
#error "Not specified hash digest size and state size"
#endif
#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8)
#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8)
#endif
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
#endif
\ No newline at end of file
#ifdef __cplusplus
extern "C" {
#endif
int crypto_hash(
unsigned char *out,
const unsigned char *in,
unsigned long long inlen
);
#ifdef __cplusplus
}
#endif
\ No newline at end of file
#include <avr/io.h>
#include <avr/sfr_defs.h>
#include <stdlib.h>
#include <string.h>
#include "config.h"
extern void crypto_aead_encrypt_asm(
unsigned char *c,
const unsigned char *m,
unsigned char mlen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern int crypto_aead_decrypt_asm(
unsigned char *m,
const unsigned char *c,
unsigned char clen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern void crypto_hash_asm(
unsigned char *out,
const unsigned char *in,
unsigned char inlen
);
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the cipher implementation goes here,
... generating a ciphertext c[0],c[1],...,c[*clen-1]
... from a plaintext m[0],m[1],...,m[mlen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce npub[0],npub[1],..
... and secret key k[0],k[1],...
... the implementation shall not use nsec
...
... return 0;
*/
(void)nsec;
crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k);
*clen = mlen + TAG_INBYTES;
return 0;
}
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the AEAD implementation goes here,
... generating a plaintext m[0],m[1],...,m[*mlen-1]
... and secret message number nsec[0],nsec[1],...
... from a ciphertext c[0],c[1],...,c[clen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce number npub[0],npub[1],...
... and secret key k[0],k[1],...
...
... return 0;
*/
unsigned long long mlen_;
unsigned char tag_is_match;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
return -1;
}
mlen_ = clen - CRYPTO_ABYTES;
tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k);
if (tag_is_match != 0)
{
memset(m, 0, (size_t)mlen_);
return -1;
}
*mlen = mlen_;
return 0;
}
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
;
; ============================================
; S R A M D E F I N I T I O N S
; ============================================
;
#include <avr/io.h>
#include "config.h"
.section .noinit
SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#if (STATE_INBYTES > 32)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
#if (STATE_INBYTES > 48)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0
SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0
SRAM_MESSAGE_IN_LEN: .BYTE 0, 0
#ifdef CRYPTO_AEAD
; For CRYPTO_AEAD
SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0
SRAM_ADLEN: .BYTE 0, 0
SRAM_NONCE_ADDR: .BYTE 0, 0
SRAM_KEY_ADDR: .BYTE 0, 0
SRAM_ADDITIONAL:
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#if (CRYPTO_ABYTES > 16)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
#if (CRYPTO_ABYTES > 24)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
#endif
.section .text
#include "permutation.h"
; require YH:YL be the address of the current associated data/cipher/message block
; for enc and dec, store ciphertext or plaintext
; require ZH:ZL be the address of the current cipher/message block
.macro XOR_to_State_ENCDEC
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
mov cnt0, rate
XOR_to_State_loop_ENCDEC:
ld tmp0, Y+ ; plaintext/ciphertext
ld tmp1, X ; state
eor tmp1, tmp0 ; ciphertext/plaintext
st Z+, tmp1 ; store ciphertext/plaintext
sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte
mov tmp1, tmp0 ; if dec, replace state
st X+, tmp1 ; store state byte
dec cnt0
brne XOR_to_State_loop_ENCDEC
; YH:YL are now the address of the next associated data block
.endm
; require YH:YL be the address of the current associated data/cipher/message block
; for enc and dec, store ciphertext or plaintext
; require ZH:ZL be the address of the current cipher/message block
.macro XOR_to_State_AUTH
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
mov cnt0, rate
XOR_to_State_loop_AUTH:
ld tmp0, Y+ ; plaintext/ciphertext
ld tmp1, X ; state
eor tmp1, tmp0 ; ciphertext/plaintext
st X+, tmp1 ; store state byte
dec cnt0
brne XOR_to_State_loop_AUTH
; YH:YL are now the address of the next associated data block
.endm
; require YH:YL pointed to the input data
; require ZH:ZL pointed to the output data
; require cnt0 containes the nubmer of bytes in source data
; require number of bytes in source data less than rate, i.e., 0 <= cnt0 < rate
;
; the 0th bit in AEDH is used to distinguish (auth AD) or (enc/dec M/C):
; AEDH[0] = 0 for (auth AD), AEDH[0] = 1 for (enc/dec M/C)
; the 1th bit in AEDH is used to distinguish (auth AD/enc M) or (dec C):
; AEDH[1] = 0 for (auth AD/enc M), AEDH[1] = 1 for (dec C)
; AEDH = 0b000 for (auth AD)
; AEDH = 0b001 for (enc M)
; AEDH = 0b011 for (dec C)
Pad_XOR_to_State:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
tst cnt0
breq XOR_padded_data
XOR_source_data_loop:
ld tmp0, Y+ ; plaintext/ciphertext
ld tmp1, X ; state
eor tmp1, tmp0 ; ciphertext/plaintext
sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result
st Z+, tmp1 ; store ciphertext/plaintext
sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte
mov tmp1, tmp0 ; if dec, replace state
st X+, tmp1 ; store state byte
dec cnt0
brne XOR_source_data_loop
XOR_padded_data:
ldi tmp0, PAD_BITS
ld tmp1, X
eor tmp1, tmp0
st X, tmp1
ret
AddDomain:
ldi XH, hi8(SRAM_STATE + STATE_INBYTES - 1)
ldi XL, lo8(SRAM_STATE + STATE_INBYTES - 1)
ldi tmp0, DOMAIN_BITS
ld tmp1, X
eor tmp0, tmp1
st X, tmp0
ret
; require ZH:ZL be the address of the destination
EXTRACT_from_State:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
mov tmp1, rate
EXTRACT_from_State_loop:
ld tmp0, X+
st Z+, tmp0
dec tmp1
brne EXTRACT_from_State_loop
ret
AUTH:
tst radlen
breq AUTH_end
cp radlen, rate
brlo auth_ad_padded_block
auth_ad_loop:
XOR_to_State_AUTH
rcall Permutation
sub radlen, rate
cp radlen, rate
brlo auth_ad_padded_block
rjmp auth_ad_loop
auth_ad_padded_block:
mov cnt0, radlen
rcall Pad_XOR_to_State
rcall Permutation
AUTH_end:
ret
#ifdef CRYPTO_AEAD
Initialization:
ldi rn, NR_0
ldi XL, lo8(SRAM_STATE)
ldi XH, hi8(SRAM_STATE)
lds YH, SRAM_NONCE_ADDR
lds YL, SRAM_NONCE_ADDR + 1
ldi cnt0, CRYPTO_NPUBBYTES
load_nonce_loop:
ld tmp0, Y+
st X+, tmp0
dec cnt0
brne load_nonce_loop
lds YH, SRAM_KEY_ADDR
lds YL, SRAM_KEY_ADDR + 1
ldi cnt0, CRYPTO_KEYBYTES
load_key_loop:
ld tmp0, Y+
st X+, tmp0
dec cnt0
brne load_key_loop
#if (STATE_INBITS==384) && (RATE_INBITS==192)
ldi cnt0, (STATE_INBYTES - CRYPTO_NPUBBYTES - CRYPTO_KEYBYTES - 1)
clr tmp0
empty_state_loop:
st X+, tmp0
dec cnt0
brne empty_state_loop
ldi tmp0, S384_R192_BITS
st X+, tmp0
#endif
rcall Permutation
ret
ENC:
tst mclen
breq ENC_end
cp mclen, rate
brlo enc_padded_block
enc_loop:
XOR_to_State_ENCDEC
ldi rn, NR_i
rcall Permutation
sub mclen, rate
cp mclen, rate
brlo enc_padded_block
rjmp enc_loop
enc_padded_block:
mov cnt0, mclen
rcall Pad_XOR_to_State
ENC_end:
ret
Finalization:
ldi rate, SQUEEZE_RATE_INBYTES
ldi rn, NR_f
rcall Permutation
rcall EXTRACT_from_State
ret
; void crypto_aead_encrypt_asm(
; unsigned char *c,
; const unsigned char *m,
; unsigned long long mlen,
; const unsigned char *ad,
; unsigned long long adlen,
; const unsigned char *npub,
; const unsigned char *k
; )
;
; unsigned char *c, is passed in r24:r25
; const unsigned char *m, is passed in r22:r23
; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used
; const unsigned char *ad, is passed in r18:r19
; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used
; const unsigned char *npub, is passed in r14:r15
; const unsigned char *k is passed in r12:r13
.global crypto_aead_encrypt_asm
crypto_aead_encrypt_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN
st X+, r20
st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR
st X+, r18
st X+, r17 ;store associated data length in SRAM_ADLEN
st X+, r16
st X+, r15 ;store nonce address in SRAM_NONCE_ADDR
st X+, r14
st X+, r13 ;store key address in SRAM_KEY_ADDR
st X+, r12
mov radlen, r16
mov mclen, r20
rcall Initialization
ldi rn, NR_i
ldi rate, RATE_INBYTES
ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_ASSOCIATED_DATA_ADDR
lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1
rcall AUTH
rcall AddDomain
ldi AEDH, 0b001 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_MESSAGE_IN_ADDR
lds YL, SRAM_MESSAGE_IN_ADDR + 1
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
rcall ENC
rcall Finalization
POP_ALL
ret
; int crypto_aead_decrypt_asm(
; unsigned char *m,
; const unsigned char *c,
; unsigned long long clen,
; const unsigned char *ad,
; unsigned long long adlen,
; const unsigned char *npub,
; const unsigned char *k
; )
;
; unsigned char *m, is passed in r24:r25
; const unsigned char *c, is passed in r22:r23
; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used
; const unsigned char *ad, is passed in r18:r19
; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used
; const unsigned char *npub, is passed in r14:r15
; const unsigned char *k is passed in r12:r13
.global crypto_aead_decrypt_asm
crypto_aead_decrypt_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN
st X+, r20
st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR
st X+, r18
st X+, r17 ;store associated data length in SRAM_ADLEN
st X+, r16
st X+, r15 ;store nonce address in SRAM_NONCE_ADDR
st X+, r14
st X+, r13 ;store key address in SRAM_KEY_ADDR
st X+, r12
mov radlen, r16
mov mclen, r20
rcall Initialization
ldi rn, NR_i
ldi rate, RATE_INBYTES
ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_ASSOCIATED_DATA_ADDR
lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1
rcall AUTH
rcall AddDomain
ldi AEDH, 0b011 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_MESSAGE_IN_ADDR
lds YL, SRAM_MESSAGE_IN_ADDR + 1
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
rcall ENC
ldi ZH, hi8(SRAM_ADDITIONAL)
ldi ZL, lo8(SRAM_ADDITIONAL)
rcall Finalization
sbiw ZL, CRYPTO_ABYTES
ldi cnt0, CRYPTO_ABYTES
compare_tag:
ld tmp0, Z+
ld tmp1, Y+
cp tmp0, tmp1
brne return_tag_not_match
dec cnt0
brne compare_tag
rjmp return_tag_match
return_tag_not_match:
ldi r25, 0xFF
ldi r24, 0xFF
rjmp crypto_aead_decrypt_end
return_tag_match:
clr r25
clr r24
crypto_aead_decrypt_end:
POP_ALL
ret
; #ifdef CRYPTO_AEAD
#endif
#ifdef CRYPTO_HASH
; void crypto_hash_asm(
; unsigned char *out,
; const unsigned char *in,
; unsigned long long inlen
; )
;
; unsigned char *out, is passed in r24:r25
; const unsigned char *in, is passed in r22:r23
; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used
.global crypto_hash_asm
crypto_hash_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN
st X+, r20
mov mclen, r20
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128)
ldi cnt0, STATE_INBYTES - 1
#else
ldi cnt0, STATE_INBYTES
#endif
clr tmp0
zero_state:
st X+, tmp0
dec cnt0
brne zero_state
#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128)
ldi tmp0, S384_R192_BITS
st X+, tmp0
#endif
ldi rn, NR_h
ldi AEDH, 0b100
HASH_ABSORBING:
mov radlen, mclen
tst radlen
breq EMPTY_M
ldi rate, HASH_RATE_INBYTES
lds YH, SRAM_MESSAGE_IN_ADDR
lds YL, SRAM_MESSAGE_IN_ADDR + 1
rcall AUTH
rjmp HASH_SQUEEZING
EMPTY_M:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
ldi tmp0, PAD_BITS
ld tmp1, X
eor tmp1, tmp0
st X, tmp1
rcall Permutation
HASH_SQUEEZING:
ldi rate, HASH_SQUEEZE_RATE_INBYTES
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
ldi tcnt, CRYPTO_BYTES
SQUEEZING_loop:
rcall EXTRACT_from_State
subi tcnt, HASH_SQUEEZE_RATE_INBYTES
breq HASH_SQUEEZING_end
rcall Permutation
rjmp SQUEEZING_loop
HASH_SQUEEZING_end:
POP_ALL
ret
#endif
; Byte Order In AVR 8:
; KNOT-AEAD(128, 256, 64):
; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[0] 0 Tag[ 0]
; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[1] 0 Tag[ 1]
; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[2] 0 Tag[ 2]
; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[3] 0 Tag[ 3]
; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[4] 0 Tag[ 4]
; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[5] 0 Tag[ 5]
; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[6] 0 Tag[ 6]
; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[7] <<< 0 Tag[ 7]
; N[ 8] AEAD_State[ 8] | Perm_row_1[0] 1
; N[ 9] AEAD_State[ 9] | Perm_row_1[1] 1
; N[10] AEAD_State[10] | Perm_row_1[2] 1
; N[11] AEAD_State[11] | Perm_row_1[3] 1
; N[12] AEAD_State[12] | Perm_row_1[4] 1
; N[13] AEAD_State[13] | Perm_row_1[5] 1
; N[14] AEAD_State[14] | Perm_row_1[6] 1
; N[15] AEAD_State[15] | Perm_row_1[7] <<< 1
; K[ 0] AEAD_State[16] | Perm_row_2[0] 8
; K[ 1] AEAD_State[17] | Perm_row_2[1] 8
; K[ 2] AEAD_State[18] | Perm_row_2[2] 8
; K[ 3] AEAD_State[19] | Perm_row_2[3] 8
; K[ 4] AEAD_State[20] | Perm_row_2[4] 8
; K[ 5] AEAD_State[21] | Perm_row_2[5] 8
; K[ 6] AEAD_State[22] | Perm_row_2[6] 8
; K[ 7] AEAD_State[23] | Perm_row_2[7] <<< 8
; K[ 8] AEAD_State[24] | Perm_row_3[0] 25
; K[ 9] AEAD_State[25] | Perm_row_3[1] 25
; K[10] AEAD_State[26] | Perm_row_3[2] 25
; K[11] AEAD_State[27] | Perm_row_3[3] 25
; K[12] AEAD_State[28] | Perm_row_3[4] 25
; K[13] AEAD_State[29] | Perm_row_3[5] 25
; K[14] AEAD_State[30] | Perm_row_3[6] 25
; K[15] AEAD_State[31] | ^0x80 Perm_row_3[7] <<< 25
;
;
; KNOT-AEAD(128, 384, 192):
; Initalization
; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[ 0] 0 Tag[ 0]
; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[ 1] 0 Tag[ 1]
; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[ 2] 0 Tag[ 2]
; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[ 3] 0 Tag[ 3]
; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[ 4] 0 Tag[ 4]
; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[ 5] 0 Tag[ 5]
; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[ 6] 0 Tag[ 6]
; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[ 7] 0 Tag[ 7]
; N[ 8] AEAD_State[ 8] | Message[ 8] 0x00 Perm_row_0[ 8] 0 Tag[ 8]
; N[ 9] AEAD_State[ 9] | Message[ 9] 0x00 Perm_row_0[ 9] 0 Tag[ 9]
; N[10] AEAD_State[10] | Message[10] 0x00 Perm_row_0[10] 0 Tag[10]
; N[11] AEAD_State[11] | Message[11] 0x00 Perm_row_0[11] <<< 0 Tag[11]
; N[12] AEAD_State[12] | Message[12] 0x00 Perm_row_1[ 0] 1 Tag[12]
; N[13] AEAD_State[13] | Message[13] 0x00 Perm_row_1[ 1] 1 Tag[13]
; N[14] AEAD_State[14] | Message[14] 0x00 Perm_row_1[ 2] 1 Tag[14]
; N[15] AEAD_State[15] | Message[15] 0x00 Perm_row_1[ 3] 1 Tag[15]
; K[ 0] AEAD_State[16] | Message[16] 0x00 Perm_row_1[ 4] 1
; K[ 1] AEAD_State[17] | Message[17] 0x00 Perm_row_1[ 5] 1
; K[ 2] AEAD_State[18] | Message[18] 0x00 Perm_row_1[ 6] 1
; K[ 3] AEAD_State[19] | Message[19] 0x00 Perm_row_1[ 7] 1
; K[ 4] AEAD_State[20] | Message[20] 0x00 Perm_row_1[ 8] 1
; K[ 5] AEAD_State[21] | Message[21] 0x00 Perm_row_1[ 9] 1
; K[ 6] AEAD_State[22] | Message[22] 0x00 Perm_row_1[10] 1
; K[ 7] AEAD_State[23] | Message[23] 0x00 Perm_row_1[11] <<< 1
; K[ 8] AEAD_State[24] | Perm_row_2[ 0] 8
; K[ 9] AEAD_State[25] | Perm_row_2[ 1] 8
; K[10] AEAD_State[26] | Perm_row_2[ 2] 8
; K[11] AEAD_State[27] | Perm_row_2[ 3] 8
; K[12] AEAD_State[28] | Perm_row_2[ 4] 8
; K[13] AEAD_State[29] | Perm_row_2[ 5] 8
; K[14] AEAD_State[30] | Perm_row_2[ 6] 8
; K[15] AEAD_State[31] | Perm_row_2[ 7] 8
; 0x00 AEAD_State[32] | Perm_row_2[ 8] 8
; 0x00 AEAD_State[33] | Perm_row_2[ 9] 8
; 0x00 AEAD_State[34] | Perm_row_2[10] 8
; 0x00 AEAD_State[35] | Perm_row_2[11] <<< 8
; 0x00 AEAD_State[36] | Perm_row_3[ 0] 55
; 0x00 AEAD_State[37] | Perm_row_3[ 1] 55
; 0x00 AEAD_State[38] | Perm_row_3[ 2] 55
; 0x00 AEAD_State[39] | Perm_row_3[ 3] 55
; 0x00 AEAD_State[40] | Perm_row_3[ 4] 55
; 0x00 AEAD_State[41] | Perm_row_3[ 5] 55
; 0x00 AEAD_State[42] | Perm_row_3[ 6] 55
; 0x00 AEAD_State[43] | Perm_row_3[ 7] 55
; 0x00 AEAD_State[44] | Perm_row_3[ 8] 55
; 0x00 AEAD_State[45] | Perm_row_3[ 9] 55
; 0x00 AEAD_State[46] | Perm_row_3[10] 55
; 0x00 ^0x80 AEAD_State[47] | ^0x80 Perm_row_3[11] <<< 55
#include <avr/io.h>
#include <avr/sfr_defs.h>
#include <stdlib.h>
#include <string.h>
#include "api.h"
#include "crypto_hash.h"
extern void crypto_hash_asm(
unsigned char *out,
const unsigned char *in,
unsigned char inlen
);
int crypto_hash(
unsigned char *out,
const unsigned char *in,
unsigned long long inlen
)
{
/*
...
... the code for the hash function implementation goes here
... generating a hash value out[0],out[1],...,out[CRYPTO_BYTES-1]
... from a message in[0],in[1],...,in[in-1]
...
... return 0;
*/
crypto_hash_asm(out, in, inlen);
return 0;
}
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
#define x10 r0
#define x11 r1
#define x12 r2
#define x13 r3
#define x14 r4
#define x15 r5
#define x16 r6
#define x17 r7
; an intentionally arrangement of registers to facilitate movw
#define x20 r8
#define x21 r10
#define x22 r12
#define x23 r14
#define x24 r9
#define x25 r11
#define x26 r13
#define x27 r15
; an intentionally arrangement of registers to facilitate movw
#define x30 r16
#define x35 r18
#define x32 r20
#define x37 r22
#define x34 r17
#define x31 r19
#define x36 r21
#define x33 r23
#define t0j r24
#define t1j r25
#define x0j r27
#include "assist.h"
.macro Sbox i0, i1, i2, i3
mov t0j, \i1
com \i0
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
ld x10, Y+
ld x11, Y+
ld x12, Y+
ld x13, Y+
ld x14, Y+
ld x15, Y+
ld x16, Y+
ld x17, Y+
ld x20, Y+
ld x21, Y+
ld x22, Y+
ld x23, Y+
ld x24, Y+
ld x25, Y+
ld x26, Y+
ld x27, Y+
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH
rjmp For_Hash
For_AEAD:
ldi ZL, lo8(RC_LFSR6)
ldi ZH, hi8(RC_LFSR6)
rjmp round_loop_start
For_Hash:
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
#elif defined(CRYPTO_AEAD)
ldi ZL, lo8(RC_LFSR6)
ldi ZH, hi8(RC_LFSR6)
#else
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
#endif
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
ld x0j, Y
eor x0j, t0j
; SubColumns
Sbox x0j, x10, x20, x30
st Y+, x0j
ld x0j, Y
Sbox x0j, x11, x21, x31
st Y+, x0j
ld x0j, Y
Sbox x0j, x12, x22, x32
st Y+, x0j
ld x0j, Y
Sbox x0j, x13, x23, x33
st Y+, x0j
ld x0j, Y
Sbox x0j, x14, x24, x34
st Y+, x0j
ld x0j, Y
Sbox x0j, x15, x25, x35
st Y+, x0j
ld x0j, Y
Sbox x0j, x16, x26, x36
st Y+, x0j
ld x0j, Y
Sbox x0j, x17, x27, x37
st Y, x0j
; ShiftRows
; <<< 1
mov t0j, x17
rol t0j
rol x10
rol x11
rol x12
rol x13
rol x14
rol x15
rol x16
rol x17
; <<< 8
; 7 6 5 4 3 2 1 0 => 6 5 4 3 2 1 0 7
;mov t0j, x27
;mov x27, x26
;mov x26, x25
;mov x25, x24
;mov x24, x23
;mov x23, x22
;mov x22, x21
;mov x21, x20
;mov x20, t0j
; an intentionally arrangement of registers to facilitate movw
movw t0j, x23 ; t1j:t0j <= x27:x23
movw x23, x22 ; x27:x23 <= x26:x22
movw x22, x21 ; x26:x22 <= x25:x21
movw x21, x20 ; x25:x21 <= x24:x20
mov x20, t1j ; x20 <= t1j
mov x24, t0j ; x24 <= t0j
; <<< 1
mov t0j, x37
rol t0j
rol x30
rol x31
rol x32
rol x33
rol x34
rol x35
rol x36
rol x37
; <<< 24
; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5
;mov t0j, x30
;mov x30, x35
;mov x35, x32
;mov x32, x37
;mov x37, x34
;mov x34, x31
;mov x31, x36
;mov x36, x33
;mov x33, t0j
; an intentionally arrangement of registers to facilitate movw
;x30 r16
;x35 r18
;x32 r20
;x37 r22
;x34 r17
;x31 r19
;x36 r21
;x33 r23
movw t0j, x30 ; t1j:t0j <= x34:x30
movw x30, x35 ; x34:x30 <= x31:x35
movw x35, x32 ; x31:x35 <= x36:x32
movw x32, x37 ; x36:x32 <= x33:x37
mov x37, t1j ; x37 <= x34
mov x33, t0j ; x33 <= x30
dec rcnt
breq round_loop_end
jmp round_loop_start
round_loop_end:
ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
st Y+, x10
st Y+, x11
st Y+, x12
st Y+, x13
st Y+, x14
st Y+, x15
st Y+, x16
st Y+, x17
st Y+, x20
st Y+, x21
st Y+, x22
st Y+, x23
st Y+, x24
st Y+, x25
st Y+, x26
st Y+, x27
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
POP_CONFLICT
ret
.section .text
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
RC_LFSR6:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06
.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29
.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28
.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24
.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37
.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26
.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f
.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#elif defined(CRYPTO_AEAD)
RC_LFSR6:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06
.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29
.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28
.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24
.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37
.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26
.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f
.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00
#else
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#endif
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
; an intentionally arrangement of registers to facilitate movw
#define x20 r0
#define x21 r2
#define x22 r4
#define x23 r6
#define x24 r8
#define x25 r10
#define x26 r1
#define x27 r3
#define x28 r5
#define x29 r7
#define x2a r9
#define x2b r11
; an intentionally arrangement of registers to facilitate movw
#define x30 r22
#define x35 r20
#define x3a r18
#define x33 r16
#define x38 r14
#define x31 r12
#define x36 r23
#define x3b r21
#define x34 r19
#define x39 r17
#define x32 r15
#define x37 r13
#define t0j r24
#define t1j r25
#define x0j r25
#define x1j r27
#include "assist.h"
.macro Sbox i0, i1, i2, i3
ldi t0j, 0xFF
eor \i0, t0j
mov t0j, \i1
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
.macro OneColumn i0, i1, i2, i3
ld \i0, Y
ldd \i1, Y + ROW_INBYTES
Sbox \i0, \i1, \i2, \i3
st Y+, \i0
rol \i1 ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES -1, \i1
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
ld x20, Y+
ld x21, Y+
ld x22, Y+
ld x23, Y+
ld x24, Y+
ld x25, Y+
ld x26, Y+
ld x27, Y+
ld x28, Y+
ld x29, Y+
ld x2a, Y+
ld x2b, Y+
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
ld x38, Y+
ld x39, Y+
ld x3a, Y+
ld x3b, Y+
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
ld x0j, Y
eor x0j, t0j
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x20, x30
st Y+, x0j
lsl x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES -1, x1j
OneColumn x0j, x1j, x21, x31
OneColumn x0j, x1j, x22, x32
OneColumn x0j, x1j, x23, x33
OneColumn x0j, x1j, x24, x34
OneColumn x0j, x1j, x25, x35
OneColumn x0j, x1j, x26, x36
OneColumn x0j, x1j, x27, x37
OneColumn x0j, x1j, x28, x38
OneColumn x0j, x1j, x29, x39
OneColumn x0j, x1j, x2a, x3a
OneColumn x0j, x1j, x2b, x3b
ld x1j, Y
eor t0j, t0j
adc x1j, t0j
st Y, x1j
; b a 9 8 7 6 5 4 3 2 1 0
; -- -- -- -- -- -- -- -- -- -- -- x- 0
; -- -- -- -- -- -- -- -- -- -- -- x' 0
; -- -- -- -- -- -- -- -- -- -- x- -- 1
; -- -- -- -- x' -- -- -- -- -- -- -- 7
; 4 3 2 1 0 b a 9 8 7 6 5
; ShiftRows -- the last two rows
; <<< 8
; b a 9 8 7 6 5 4 3 2 1 0 => a 9 8 7 6 5 4 3 2 1 0 b
movw t0j, x25 ; t1j:t0j <= x2b:x25
movw x25, x24 ; x2b:x25 <= x2a:x24
movw x24, x23 ; x2a:x24 <= x29:x23
movw x23, x22 ; x29:x23 <= x28:x22
movw x22, x21 ; x28:x22 <= x27:x21
movw x21, x20 ; x27:x21 <= x26:x20
mov x26, t0j ; x26 <= x25
mov x20, t1j ; x20 <= x2b
; >>> 1
mov t0j, x3b
ror t0j
ror x3a
ror x39
ror x38
ror x37
ror x36
ror x35
ror x34
ror x33
ror x32
ror x31
ror x30
ror x3b
; <<< 56
; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5
; mov x3j, x30
; mov x30, x35
; mov x35, x3a
; mov x3a, x33
; mov x33, x38
; mov x38, x31
; mov x31, x36
; mov x36, x3b
; mov x3b, x34
; mov x34, x39
; mov x39, x32
; mov x32, x37
; mov x37, x3j
; an intentionally arrangement of registers to facilitate movw
; x30 r22
; x35 r20
; x3a r18
; x33 r16
; x38 r14
; x31 r12
; x36 r23
; x3b r21
; x34 r19
; x39 r17
; x32 r15
; x37 r13
movw t0j, x30 ; t1j:t0j <= x36:x30
movw x30, x35 ; x36:x30 <= x3b:x35
movw x35, x3a ; x3b:x35 <= x34:x3a
movw x3a, x33 ; x34:x3a <= x39:x33
movw x33, x38 ; x39:x33 <= x32:x38
movw x38, x31 ; x32:x38 <= x37:x31
mov x31, t1j ; x31 <= x36
mov x37, t0j ; x37 <= x30
dec rcnt
breq round_loop_end
jmp round_loop_start
round_loop_end:
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
st Y+, x20
st Y+, x21
st Y+, x22
st Y+, x23
st Y+, x24
st Y+, x25
st Y+, x26
st Y+, x27
st Y+, x28
st Y+, x29
st Y+, x2a
st Y+, x2b
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
st Y+, x38
st Y+, x39
st Y+, x3a
st Y+, x3b
POP_CONFLICT
ret
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
#define x20 r0
#define x22 r2
#define x24 r4
#define x26 r6
#define x28 r1
#define x2a r3
#define x2c r5
#define x2e r7
#define x30 r8
#define x3d r10
#define x3a r12
#define x37 r14
#define x34 r16
#define x31 r18
#define x3e r20
#define x3b r22
#define x38 r9
#define x35 r11
#define x32 r13
#define x3f r15
#define x3c r17
#define x39 r19
#define x36 r21
#define x33 r23
#define t0j r24
#define t1j r25
#define x0j r25
#define x1j r27
#define x2j r26
#include "assist.h"
.macro Sbox i0, i1, i2, i3
ldi t0j, 0xFF
eor \i0, t0j
mov t0j, \i1
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
.macro TwoColumns i2_e, i3_e, i3_o
; column 2i
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, \i2_e, \i3_e
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
; column 2i+1
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x2j, \i3_o
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
ldd t0j, Y + 2 * ROW_INBYTES + 1
std Y + 2 * ROW_INBYTES + 1, x2j
mov x2j, t0j
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
push rcnt
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
ldd x20, Y + 0x00
ldd x22, Y + 0x02
ldd x24, Y + 0x04
ldd x26, Y + 0x06
ldd x28, Y + 0x08
ldd x2a, Y + 0x0a
ldd x2c, Y + 0x0c
ldd x2e, Y + 0x0e
adiw YL, ROW_INBYTES
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
ld x38, Y+
ld x39, Y+
ld x3a, Y+
ld x3b, Y+
ld x3c, Y+
ld x3d, Y+
ld x3e, Y+
ld x3f, Y+
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH
rjmp For_Hash
For_AEAD:
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
rjmp round_loop_start
For_Hash:
ldi ZL, lo8(RC_LFSR8)
ldi ZH, hi8(RC_LFSR8)
#elif defined(CRYPTO_AEAD)
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
#else
ldi ZL, lo8(RC_LFSR8)
ldi ZH, hi8(RC_LFSR8)
#endif
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
; column 0
ld x0j, Y
eor x0j, t0j
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x20, x30
st Y+, x0j
lsl x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
; column 1
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
ldd x2j, Y + 2 * ROW_INBYTES
Sbox x0j, x1j, x2j, x31
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
ldd t0j, Y + 2 * ROW_INBYTES + 1
std Y + 2 * ROW_INBYTES + 1, x2j
mov x2j, t0j
; column 2, 3
TwoColumns x22, x32, x33
; column 4, 5
TwoColumns x24, x34, x35
; column 6, 7
TwoColumns x26, x36, x37
; column 8, 9
TwoColumns x28, x38, x39
; column 10, 11
TwoColumns x2a, x3a, x3b
; column 12, 13
TwoColumns x2c, x3c, x3d
; column 14
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x2e, x3e
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
; column 15
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x2j, x3f
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
ld x1j, Y
eor t0j, t0j
adc x1j, t0j
st Y, x1j
std Y + ROW_INBYTES + 1, x2j
; f e d c b a 9 8 7 6 5 4 3 2 1 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2
; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3
; c b a 9 8 7 6 5 4 3 2 1 0 f e d
; x2e x2c x2a x28 x26 x24 x22 x20 => x2c x2a x28 x26 x24 x22 x20 x2e
;mov t0j, x2e
;mov x2e, x2c
;mov x2c, x2a
;mov x2a, x28
;mov x28, x26
;mov x26, x24
;mov x24, x22
;mov x22, x20
;mov x20, t0j
; an intentionally arrangement of registers to facilitate movw
movw t0j, x26 ; t1j:t0j <= x2e:x26
movw x26, x24 ; x2e:x26 <= x2c:x24
movw x24, x22 ; x2c:x24 <= x2a:x22
movw x22, x20 ; x2a:x22 <= x28:x20
mov x20, t1j ; x20 <= t1j
mov x28, t0j ; x28 <= t0j
; <<< 1
mov t0j, x3f
rol t0j
rol x30
rol x31
rol x32
rol x33
rol x34
rol x35
rol x36
rol x37
rol x38
rol x39
rol x3a
rol x3b
rol x3c
rol x3d
rol x3e
rol x3f
; <<< 24
; f e d c b a 9 8 7 6 5 4 3 2 1 0 =>
; c b a 9 8 7 6 5 4 3 2 1 0 f e d
; mov x3j, x30
; mov x30, x3d
; mov x3d, x3a
; mov x3a, x37
; mov x37, x34
; mov x34, x31
; mov x31, x3e
; mov x3e, x3b
; mov x3b, x38
; mov x38, x35
; mov x35, x32
; mov x32, x3f
; mov x3f, x3c
; mov x3c, x39
; mov x39, x36
; mov x36, x33
; mov x33, x3j
; an intentionally arrangement of registers to facilitate movw
; x30 r8
; x3d r10
; x3a r12
; x37 r14
; x34 r16
; x31 r18
; x3e r20
; x3b r22
; x38 r9
; x35 r11
; x32 r13
; x3f r15
; x3c r17
; x39 r19
; x36 r21
; x33 r23
movw t0j, x30 ; t1j:t0j <= x38:x30
movw x30, x3d ; x38:x30 <= x35:x3d
movw x3d, x3a ; x35:x3d <= x32:x3a
movw x3a, x37 ; x32:x3a <= x3f:x37
movw x37, x34 ; x3f:x37 <= x3c:x34
movw x34, x31 ; x3c:x34 <= x39:x31
movw x31, x3e ; x39:x31 <= x36:x3e
movw x3e, x3b ; x36:x3e <= x33:x3b
mov x3b, t1j ; x3b <= x38
mov x33, t0j ; x33 <= x30
pop rcnt
dec rcnt
push rcnt
breq round_loop_end
rjmp round_loop_start
round_loop_end:
pop rcnt
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
std Y + 0x00, x20
std Y + 0x02, x22
std Y + 0x04, x24
std Y + 0x06, x26
std Y + 0x08, x28
std Y + 0x0a, x2a
std Y + 0x0c, x2c
std Y + 0x0e, x2e
adiw YL, ROW_INBYTES
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
st Y+, x38
st Y+, x39
st Y+, x3a
st Y+, x3b
st Y+, x3c
st Y+, x3d
st Y+, x3e
st Y+, x3f
POP_CONFLICT
ret
.section .text
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
RC_LFSR8:
.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e
.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25
.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0
.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9
.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37
.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20
.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad
.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac
.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0
.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb
.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a
.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8
.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86
.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7
.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c
.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5
.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4
.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52
.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7
.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76
.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b
.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4
.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd
.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46
.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e
.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab
.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5
.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09
.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe
.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17
.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d
.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00
#elif defined(CRYPTO_AEAD)
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#else
RC_LFSR8:
.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e
.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25
.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0
.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9
.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37
.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20
.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad
.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac
.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0
.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb
.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a
.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8
.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86
.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7
.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c
.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5
.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4
.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52
.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7
.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76
.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b
.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4
.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd
.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46
.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e
.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab
.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5
.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09
.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe
.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17
.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d
.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00
#endif
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
;
; ============================================
; R E G I S T E R D E F I N I T I O N S
; ============================================
;
#define mclen r16
#define radlen r17
#define tcnt r17
#define tmp0 r20
#define tmp1 r21
#define cnt0 r22
#define rn r23
#define rate r24
; AEDH = 0b000: for authenticate AD
; AEDH = 0b001: for encryption
; AEDH = 0b011: for decryption
; AEDH = 0b100: for hash
#define AEDH r25
#define rcnt r26
#if (STATE_INBITS==256)
#include "knot256.h"
#elif (STATE_INBITS==384)
#include "knot384.h"
#elif (STATE_INBITS==512)
#include "knot512.h"
#else
#error "Not specified key size and state size"
#endif
#define CRYPTO_BYTES 64
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
.macro PUSH_CONFLICT
push r16
push r17
push r18
push r19
push r23
push r24
push r25
push r26
push r27
push r28
push r29
push r30
push r31
.endm
.macro POP_CONFLICT
pop r31
pop r30
pop r29
pop r28
pop r27
pop r26
pop r25
pop r24
pop r23
pop r19
pop r18
pop r17
pop r16
.endm
.macro PUSH_ALL
push r2
push r3
push r4
push r5
push r6
push r7
push r8
push r9
push r10
push r11
push r12
push r13
push r14
push r15
push r16
push r17
push r28
push r29
.endm
.macro POP_ALL
pop r29
pop r28
pop r17
pop r16
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
pop r9
pop r8
pop r7
pop r6
pop r5
pop r4
pop r3
pop r2
clr r1
.endm
\ No newline at end of file
#ifndef __CONFIG_H__
#define __CONFIG_H__
//#define CRYPTO_AEAD
#define CRYPTO_HASH
#define MAX_MESSAGE_LENGTH 128
#define STATE_INBITS 512
/* For CRYPTO_AEAD */
#define CRYPTO_KEYBITS 256
/* For CRYPTO_HASH */
#define CRYPTO_BITS 512
#define STATE_INBYTES ((STATE_INBITS + 7) / 8)
#define ROW_INBITS ((STATE_INBITS + 3) / 4)
#define ROW_INBYTES ((ROW_INBITS + 7) / 8)
/* For CRYPTO_AEAD */
#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8)
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES
#define CRYPTO_ABYTES CRYPTO_KEYBYTES
#define CRYPTO_NOOVERLAP 1
#define MAX_ASSOCIATED_DATA_LENGTH 32
#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES)
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
/* For CRYPTO_HASH */
#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8)
#define DOMAIN_BITS 0x80
#define PAD_BITS 0x01
#define S384_R192_BITS 0x80
#if (STATE_INBITS==256)
#define C1 1
#define C2 8
#define C3 25
#elif (STATE_INBITS==384)
#define C1 1
#define C2 8
#define C3 55
#elif (STATE_INBITS==512)
#define C1 1
#define C2 16
#define C3 25
#else
#error "Not specified state size"
#endif
#ifdef CRYPTO_AEAD
/* For CRYPTO_AEAD */
#define KEY_INBITS (CRYPTO_KEYBYTES * 8)
#define KEY_INBYTES (CRYPTO_KEYBYTES)
#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8)
#define NONCE_INBYTES (CRYPTO_NPUBBYTES)
#define TAG_INBITS (CRYPTO_ABYTES * 8)
#define TAG_INBYTES (CRYPTO_ABYTES)
#if (KEY_INBITS==128) && (STATE_INBITS==256)
#define RATE_INBITS 64
#define NR_0 52
#define NR_i 28
#define NR_f 32
#elif (KEY_INBITS==128) && (STATE_INBITS==384)
#define RATE_INBITS 192
#define NR_0 76
#define NR_i 28
#define NR_f 32
#elif (KEY_INBITS==192) && (STATE_INBITS==384)
#define RATE_INBITS 96
#define NR_0 76
#define NR_i 40
#define NR_f 44
#elif (KEY_INBITS==256) && (STATE_INBITS==512)
#define RATE_INBITS 128
#define NR_0 100
#define NR_i 52
#define NR_f 56
#else
#error "Not specified key size and state size"
#endif
#define RATE_INBYTES ((RATE_INBITS + 7) / 8)
#define SQUEEZE_RATE_INBYTES TAG_INBYTES
#endif
#ifdef CRYPTO_HASH
/* For CRYPTO_HASH */
#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8)
#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256)
#define HASH_RATE_INBITS 32
#define HASH_SQUEEZE_RATE_INBITS 128
#define NR_h 68
#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384)
#define HASH_RATE_INBITS 128
#define HASH_SQUEEZE_RATE_INBITS 128
#define NR_h 80
#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384)
#define HASH_RATE_INBITS 48
#define HASH_SQUEEZE_RATE_INBITS 192
#define NR_h 104
#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512)
#define HASH_RATE_INBITS 64
#define HASH_SQUEEZE_RATE_INBITS 256
#define NR_h 140
#else
#error "Not specified hash digest size and state size"
#endif
#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8)
#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8)
#endif
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
#endif
\ No newline at end of file
#ifdef __cplusplus
extern "C" {
#endif
int crypto_hash(
unsigned char *out,
const unsigned char *in,
unsigned long long inlen
);
#ifdef __cplusplus
}
#endif
\ No newline at end of file
#include <avr/io.h>
#include <avr/sfr_defs.h>
#include <stdlib.h>
#include <string.h>
#include "config.h"
extern void crypto_aead_encrypt_asm(
unsigned char *c,
const unsigned char *m,
unsigned char mlen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern int crypto_aead_decrypt_asm(
unsigned char *m,
const unsigned char *c,
unsigned char clen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern void crypto_hash_asm(
unsigned char *out,
const unsigned char *in,
unsigned char inlen
);
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the cipher implementation goes here,
... generating a ciphertext c[0],c[1],...,c[*clen-1]
... from a plaintext m[0],m[1],...,m[mlen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce npub[0],npub[1],..
... and secret key k[0],k[1],...
... the implementation shall not use nsec
...
... return 0;
*/
(void)nsec;
crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k);
*clen = mlen + TAG_INBYTES;
return 0;
}
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the AEAD implementation goes here,
... generating a plaintext m[0],m[1],...,m[*mlen-1]
... and secret message number nsec[0],nsec[1],...
... from a ciphertext c[0],c[1],...,c[clen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce number npub[0],npub[1],...
... and secret key k[0],k[1],...
...
... return 0;
*/
unsigned long long mlen_;
unsigned char tag_is_match;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
return -1;
}
mlen_ = clen - CRYPTO_ABYTES;
tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k);
if (tag_is_match != 0)
{
memset(m, 0, (size_t)mlen_);
return -1;
}
*mlen = mlen_;
return 0;
}
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
;
; ============================================
; S R A M D E F I N I T I O N S
; ============================================
;
#include <avr/io.h>
#include "config.h"
.section .noinit
SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#if (STATE_INBYTES > 32)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
#if (STATE_INBYTES > 48)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0
SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0
SRAM_MESSAGE_IN_LEN: .BYTE 0, 0
#ifdef CRYPTO_AEAD
; For CRYPTO_AEAD
SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0
SRAM_ADLEN: .BYTE 0, 0
SRAM_NONCE_ADDR: .BYTE 0, 0
SRAM_KEY_ADDR: .BYTE 0, 0
SRAM_ADDITIONAL:
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#if (CRYPTO_ABYTES > 16)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
#if (CRYPTO_ABYTES > 24)
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
#endif
#endif
.section .text
#include "permutation.h"
; require YH:YL be the address of the current associated data/cipher/message block
; for enc and dec, store ciphertext or plaintext
; require ZH:ZL be the address of the current cipher/message block
.macro XOR_to_State_ENCDEC
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
mov cnt0, rate
XOR_to_State_loop_ENCDEC:
ld tmp0, Y+ ; plaintext/ciphertext
ld tmp1, X ; state
eor tmp1, tmp0 ; ciphertext/plaintext
st Z+, tmp1 ; store ciphertext/plaintext
sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte
mov tmp1, tmp0 ; if dec, replace state
st X+, tmp1 ; store state byte
dec cnt0
brne XOR_to_State_loop_ENCDEC
; YH:YL are now the address of the next associated data block
.endm
; require YH:YL be the address of the current associated data/cipher/message block
; for enc and dec, store ciphertext or plaintext
; require ZH:ZL be the address of the current cipher/message block
.macro XOR_to_State_AUTH
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
mov cnt0, rate
XOR_to_State_loop_AUTH:
ld tmp0, Y+ ; plaintext/ciphertext
ld tmp1, X ; state
eor tmp1, tmp0 ; ciphertext/plaintext
st X+, tmp1 ; store state byte
dec cnt0
brne XOR_to_State_loop_AUTH
; YH:YL are now the address of the next associated data block
.endm
; require YH:YL pointed to the input data
; require ZH:ZL pointed to the output data
; require cnt0 containes the nubmer of bytes in source data
; require number of bytes in source data less than rate, i.e., 0 <= cnt0 < rate
;
; the 0th bit in AEDH is used to distinguish (auth AD) or (enc/dec M/C):
; AEDH[0] = 0 for (auth AD), AEDH[0] = 1 for (enc/dec M/C)
; the 1th bit in AEDH is used to distinguish (auth AD/enc M) or (dec C):
; AEDH[1] = 0 for (auth AD/enc M), AEDH[1] = 1 for (dec C)
; AEDH = 0b000 for (auth AD)
; AEDH = 0b001 for (enc M)
; AEDH = 0b011 for (dec C)
Pad_XOR_to_State:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
tst cnt0
breq XOR_padded_data
XOR_source_data_loop:
ld tmp0, Y+ ; plaintext/ciphertext
ld tmp1, X ; state
eor tmp1, tmp0 ; ciphertext/plaintext
sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result
st Z+, tmp1 ; store ciphertext/plaintext
sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte
mov tmp1, tmp0 ; if dec, replace state
st X+, tmp1 ; store state byte
dec cnt0
brne XOR_source_data_loop
XOR_padded_data:
ldi tmp0, PAD_BITS
ld tmp1, X
eor tmp1, tmp0
st X, tmp1
ret
AddDomain:
ldi XH, hi8(SRAM_STATE + STATE_INBYTES - 1)
ldi XL, lo8(SRAM_STATE + STATE_INBYTES - 1)
ldi tmp0, DOMAIN_BITS
ld tmp1, X
eor tmp0, tmp1
st X, tmp0
ret
; require ZH:ZL be the address of the destination
EXTRACT_from_State:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
mov tmp1, rate
EXTRACT_from_State_loop:
ld tmp0, X+
st Z+, tmp0
dec tmp1
brne EXTRACT_from_State_loop
ret
AUTH:
tst radlen
breq AUTH_end
cp radlen, rate
brlo auth_ad_padded_block
auth_ad_loop:
XOR_to_State_AUTH
rcall Permutation
sub radlen, rate
cp radlen, rate
brlo auth_ad_padded_block
rjmp auth_ad_loop
auth_ad_padded_block:
mov cnt0, radlen
rcall Pad_XOR_to_State
rcall Permutation
AUTH_end:
ret
#ifdef CRYPTO_AEAD
Initialization:
ldi rn, NR_0
ldi XL, lo8(SRAM_STATE)
ldi XH, hi8(SRAM_STATE)
lds YH, SRAM_NONCE_ADDR
lds YL, SRAM_NONCE_ADDR + 1
ldi cnt0, CRYPTO_NPUBBYTES
load_nonce_loop:
ld tmp0, Y+
st X+, tmp0
dec cnt0
brne load_nonce_loop
lds YH, SRAM_KEY_ADDR
lds YL, SRAM_KEY_ADDR + 1
ldi cnt0, CRYPTO_KEYBYTES
load_key_loop:
ld tmp0, Y+
st X+, tmp0
dec cnt0
brne load_key_loop
#if (STATE_INBITS==384) && (RATE_INBITS==192)
ldi cnt0, (STATE_INBYTES - CRYPTO_NPUBBYTES - CRYPTO_KEYBYTES - 1)
clr tmp0
empty_state_loop:
st X+, tmp0
dec cnt0
brne empty_state_loop
ldi tmp0, S384_R192_BITS
st X+, tmp0
#endif
rcall Permutation
ret
ENC:
tst mclen
breq ENC_end
cp mclen, rate
brlo enc_padded_block
enc_loop:
XOR_to_State_ENCDEC
ldi rn, NR_i
rcall Permutation
sub mclen, rate
cp mclen, rate
brlo enc_padded_block
rjmp enc_loop
enc_padded_block:
mov cnt0, mclen
rcall Pad_XOR_to_State
ENC_end:
ret
Finalization:
ldi rate, SQUEEZE_RATE_INBYTES
ldi rn, NR_f
rcall Permutation
rcall EXTRACT_from_State
ret
; void crypto_aead_encrypt_asm(
; unsigned char *c,
; const unsigned char *m,
; unsigned long long mlen,
; const unsigned char *ad,
; unsigned long long adlen,
; const unsigned char *npub,
; const unsigned char *k
; )
;
; unsigned char *c, is passed in r24:r25
; const unsigned char *m, is passed in r22:r23
; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used
; const unsigned char *ad, is passed in r18:r19
; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used
; const unsigned char *npub, is passed in r14:r15
; const unsigned char *k is passed in r12:r13
.global crypto_aead_encrypt_asm
crypto_aead_encrypt_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN
st X+, r20
st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR
st X+, r18
st X+, r17 ;store associated data length in SRAM_ADLEN
st X+, r16
st X+, r15 ;store nonce address in SRAM_NONCE_ADDR
st X+, r14
st X+, r13 ;store key address in SRAM_KEY_ADDR
st X+, r12
mov radlen, r16
mov mclen, r20
rcall Initialization
ldi rn, NR_i
ldi rate, RATE_INBYTES
ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_ASSOCIATED_DATA_ADDR
lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1
rcall AUTH
rcall AddDomain
ldi AEDH, 0b001 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_MESSAGE_IN_ADDR
lds YL, SRAM_MESSAGE_IN_ADDR + 1
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
rcall ENC
rcall Finalization
POP_ALL
ret
; int crypto_aead_decrypt_asm(
; unsigned char *m,
; const unsigned char *c,
; unsigned long long clen,
; const unsigned char *ad,
; unsigned long long adlen,
; const unsigned char *npub,
; const unsigned char *k
; )
;
; unsigned char *m, is passed in r24:r25
; const unsigned char *c, is passed in r22:r23
; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used
; const unsigned char *ad, is passed in r18:r19
; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used
; const unsigned char *npub, is passed in r14:r15
; const unsigned char *k is passed in r12:r13
.global crypto_aead_decrypt_asm
crypto_aead_decrypt_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN
st X+, r20
st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR
st X+, r18
st X+, r17 ;store associated data length in SRAM_ADLEN
st X+, r16
st X+, r15 ;store nonce address in SRAM_NONCE_ADDR
st X+, r14
st X+, r13 ;store key address in SRAM_KEY_ADDR
st X+, r12
mov radlen, r16
mov mclen, r20
rcall Initialization
ldi rn, NR_i
ldi rate, RATE_INBYTES
ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_ASSOCIATED_DATA_ADDR
lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1
rcall AUTH
rcall AddDomain
ldi AEDH, 0b011 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C)
lds YH, SRAM_MESSAGE_IN_ADDR
lds YL, SRAM_MESSAGE_IN_ADDR + 1
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
rcall ENC
ldi ZH, hi8(SRAM_ADDITIONAL)
ldi ZL, lo8(SRAM_ADDITIONAL)
rcall Finalization
sbiw ZL, CRYPTO_ABYTES
ldi cnt0, CRYPTO_ABYTES
compare_tag:
ld tmp0, Z+
ld tmp1, Y+
cp tmp0, tmp1
brne return_tag_not_match
dec cnt0
brne compare_tag
rjmp return_tag_match
return_tag_not_match:
ldi r25, 0xFF
ldi r24, 0xFF
rjmp crypto_aead_decrypt_end
return_tag_match:
clr r25
clr r24
crypto_aead_decrypt_end:
POP_ALL
ret
; #ifdef CRYPTO_AEAD
#endif
#ifdef CRYPTO_HASH
; void crypto_hash_asm(
; unsigned char *out,
; const unsigned char *in,
; unsigned long long inlen
; )
;
; unsigned char *out, is passed in r24:r25
; const unsigned char *in, is passed in r22:r23
; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used
.global crypto_hash_asm
crypto_hash_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN
st X+, r20
mov mclen, r20
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128)
ldi cnt0, STATE_INBYTES - 1
#else
ldi cnt0, STATE_INBYTES
#endif
clr tmp0
zero_state:
st X+, tmp0
dec cnt0
brne zero_state
#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128)
ldi tmp0, S384_R192_BITS
st X+, tmp0
#endif
ldi rn, NR_h
ldi AEDH, 0b100
HASH_ABSORBING:
mov radlen, mclen
tst radlen
breq EMPTY_M
ldi rate, HASH_RATE_INBYTES
lds YH, SRAM_MESSAGE_IN_ADDR
lds YL, SRAM_MESSAGE_IN_ADDR + 1
rcall AUTH
rjmp HASH_SQUEEZING
EMPTY_M:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
ldi tmp0, PAD_BITS
ld tmp1, X
eor tmp1, tmp0
st X, tmp1
rcall Permutation
HASH_SQUEEZING:
ldi rate, HASH_SQUEEZE_RATE_INBYTES
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
ldi tcnt, CRYPTO_BYTES
SQUEEZING_loop:
rcall EXTRACT_from_State
subi tcnt, HASH_SQUEEZE_RATE_INBYTES
breq HASH_SQUEEZING_end
rcall Permutation
rjmp SQUEEZING_loop
HASH_SQUEEZING_end:
POP_ALL
ret
#endif
; Byte Order In AVR 8:
; KNOT-AEAD(128, 256, 64):
; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[0] 0 Tag[ 0]
; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[1] 0 Tag[ 1]
; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[2] 0 Tag[ 2]
; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[3] 0 Tag[ 3]
; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[4] 0 Tag[ 4]
; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[5] 0 Tag[ 5]
; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[6] 0 Tag[ 6]
; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[7] <<< 0 Tag[ 7]
; N[ 8] AEAD_State[ 8] | Perm_row_1[0] 1
; N[ 9] AEAD_State[ 9] | Perm_row_1[1] 1
; N[10] AEAD_State[10] | Perm_row_1[2] 1
; N[11] AEAD_State[11] | Perm_row_1[3] 1
; N[12] AEAD_State[12] | Perm_row_1[4] 1
; N[13] AEAD_State[13] | Perm_row_1[5] 1
; N[14] AEAD_State[14] | Perm_row_1[6] 1
; N[15] AEAD_State[15] | Perm_row_1[7] <<< 1
; K[ 0] AEAD_State[16] | Perm_row_2[0] 8
; K[ 1] AEAD_State[17] | Perm_row_2[1] 8
; K[ 2] AEAD_State[18] | Perm_row_2[2] 8
; K[ 3] AEAD_State[19] | Perm_row_2[3] 8
; K[ 4] AEAD_State[20] | Perm_row_2[4] 8
; K[ 5] AEAD_State[21] | Perm_row_2[5] 8
; K[ 6] AEAD_State[22] | Perm_row_2[6] 8
; K[ 7] AEAD_State[23] | Perm_row_2[7] <<< 8
; K[ 8] AEAD_State[24] | Perm_row_3[0] 25
; K[ 9] AEAD_State[25] | Perm_row_3[1] 25
; K[10] AEAD_State[26] | Perm_row_3[2] 25
; K[11] AEAD_State[27] | Perm_row_3[3] 25
; K[12] AEAD_State[28] | Perm_row_3[4] 25
; K[13] AEAD_State[29] | Perm_row_3[5] 25
; K[14] AEAD_State[30] | Perm_row_3[6] 25
; K[15] AEAD_State[31] | ^0x80 Perm_row_3[7] <<< 25
;
;
; KNOT-AEAD(128, 384, 192):
; Initalization
; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[ 0] 0 Tag[ 0]
; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[ 1] 0 Tag[ 1]
; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[ 2] 0 Tag[ 2]
; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[ 3] 0 Tag[ 3]
; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[ 4] 0 Tag[ 4]
; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[ 5] 0 Tag[ 5]
; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[ 6] 0 Tag[ 6]
; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[ 7] 0 Tag[ 7]
; N[ 8] AEAD_State[ 8] | Message[ 8] 0x00 Perm_row_0[ 8] 0 Tag[ 8]
; N[ 9] AEAD_State[ 9] | Message[ 9] 0x00 Perm_row_0[ 9] 0 Tag[ 9]
; N[10] AEAD_State[10] | Message[10] 0x00 Perm_row_0[10] 0 Tag[10]
; N[11] AEAD_State[11] | Message[11] 0x00 Perm_row_0[11] <<< 0 Tag[11]
; N[12] AEAD_State[12] | Message[12] 0x00 Perm_row_1[ 0] 1 Tag[12]
; N[13] AEAD_State[13] | Message[13] 0x00 Perm_row_1[ 1] 1 Tag[13]
; N[14] AEAD_State[14] | Message[14] 0x00 Perm_row_1[ 2] 1 Tag[14]
; N[15] AEAD_State[15] | Message[15] 0x00 Perm_row_1[ 3] 1 Tag[15]
; K[ 0] AEAD_State[16] | Message[16] 0x00 Perm_row_1[ 4] 1
; K[ 1] AEAD_State[17] | Message[17] 0x00 Perm_row_1[ 5] 1
; K[ 2] AEAD_State[18] | Message[18] 0x00 Perm_row_1[ 6] 1
; K[ 3] AEAD_State[19] | Message[19] 0x00 Perm_row_1[ 7] 1
; K[ 4] AEAD_State[20] | Message[20] 0x00 Perm_row_1[ 8] 1
; K[ 5] AEAD_State[21] | Message[21] 0x00 Perm_row_1[ 9] 1
; K[ 6] AEAD_State[22] | Message[22] 0x00 Perm_row_1[10] 1
; K[ 7] AEAD_State[23] | Message[23] 0x00 Perm_row_1[11] <<< 1
; K[ 8] AEAD_State[24] | Perm_row_2[ 0] 8
; K[ 9] AEAD_State[25] | Perm_row_2[ 1] 8
; K[10] AEAD_State[26] | Perm_row_2[ 2] 8
; K[11] AEAD_State[27] | Perm_row_2[ 3] 8
; K[12] AEAD_State[28] | Perm_row_2[ 4] 8
; K[13] AEAD_State[29] | Perm_row_2[ 5] 8
; K[14] AEAD_State[30] | Perm_row_2[ 6] 8
; K[15] AEAD_State[31] | Perm_row_2[ 7] 8
; 0x00 AEAD_State[32] | Perm_row_2[ 8] 8
; 0x00 AEAD_State[33] | Perm_row_2[ 9] 8
; 0x00 AEAD_State[34] | Perm_row_2[10] 8
; 0x00 AEAD_State[35] | Perm_row_2[11] <<< 8
; 0x00 AEAD_State[36] | Perm_row_3[ 0] 55
; 0x00 AEAD_State[37] | Perm_row_3[ 1] 55
; 0x00 AEAD_State[38] | Perm_row_3[ 2] 55
; 0x00 AEAD_State[39] | Perm_row_3[ 3] 55
; 0x00 AEAD_State[40] | Perm_row_3[ 4] 55
; 0x00 AEAD_State[41] | Perm_row_3[ 5] 55
; 0x00 AEAD_State[42] | Perm_row_3[ 6] 55
; 0x00 AEAD_State[43] | Perm_row_3[ 7] 55
; 0x00 AEAD_State[44] | Perm_row_3[ 8] 55
; 0x00 AEAD_State[45] | Perm_row_3[ 9] 55
; 0x00 AEAD_State[46] | Perm_row_3[10] 55
; 0x00 ^0x80 AEAD_State[47] | ^0x80 Perm_row_3[11] <<< 55
#include <avr/io.h>
#include <avr/sfr_defs.h>
#include <stdlib.h>
#include <string.h>
#include "api.h"
#include "crypto_hash.h"
extern void crypto_hash_asm(
unsigned char *out,
const unsigned char *in,
unsigned char inlen
);
int crypto_hash(
unsigned char *out,
const unsigned char *in,
unsigned long long inlen
)
{
/*
...
... the code for the hash function implementation goes here
... generating a hash value out[0],out[1],...,out[CRYPTO_BYTES-1]
... from a message in[0],in[1],...,in[in-1]
...
... return 0;
*/
crypto_hash_asm(out, in, inlen);
return 0;
}
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
#define x10 r0
#define x11 r1
#define x12 r2
#define x13 r3
#define x14 r4
#define x15 r5
#define x16 r6
#define x17 r7
; an intentionally arrangement of registers to facilitate movw
#define x20 r8
#define x21 r10
#define x22 r12
#define x23 r14
#define x24 r9
#define x25 r11
#define x26 r13
#define x27 r15
; an intentionally arrangement of registers to facilitate movw
#define x30 r16
#define x35 r18
#define x32 r20
#define x37 r22
#define x34 r17
#define x31 r19
#define x36 r21
#define x33 r23
#define t0j r24
#define t1j r25
#define x0j r27
#include "assist.h"
.macro Sbox i0, i1, i2, i3
mov t0j, \i1
com \i0
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
ld x10, Y+
ld x11, Y+
ld x12, Y+
ld x13, Y+
ld x14, Y+
ld x15, Y+
ld x16, Y+
ld x17, Y+
ld x20, Y+
ld x21, Y+
ld x22, Y+
ld x23, Y+
ld x24, Y+
ld x25, Y+
ld x26, Y+
ld x27, Y+
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH
rjmp For_Hash
For_AEAD:
ldi ZL, lo8(RC_LFSR6)
ldi ZH, hi8(RC_LFSR6)
rjmp round_loop_start
For_Hash:
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
#elif defined(CRYPTO_AEAD)
ldi ZL, lo8(RC_LFSR6)
ldi ZH, hi8(RC_LFSR6)
#else
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
#endif
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
ld x0j, Y
eor x0j, t0j
; SubColumns
Sbox x0j, x10, x20, x30
st Y+, x0j
ld x0j, Y
Sbox x0j, x11, x21, x31
st Y+, x0j
ld x0j, Y
Sbox x0j, x12, x22, x32
st Y+, x0j
ld x0j, Y
Sbox x0j, x13, x23, x33
st Y+, x0j
ld x0j, Y
Sbox x0j, x14, x24, x34
st Y+, x0j
ld x0j, Y
Sbox x0j, x15, x25, x35
st Y+, x0j
ld x0j, Y
Sbox x0j, x16, x26, x36
st Y+, x0j
ld x0j, Y
Sbox x0j, x17, x27, x37
st Y, x0j
; ShiftRows
; <<< 1
mov t0j, x17
rol t0j
rol x10
rol x11
rol x12
rol x13
rol x14
rol x15
rol x16
rol x17
; <<< 8
; 7 6 5 4 3 2 1 0 => 6 5 4 3 2 1 0 7
;mov t0j, x27
;mov x27, x26
;mov x26, x25
;mov x25, x24
;mov x24, x23
;mov x23, x22
;mov x22, x21
;mov x21, x20
;mov x20, t0j
; an intentionally arrangement of registers to facilitate movw
movw t0j, x23 ; t1j:t0j <= x27:x23
movw x23, x22 ; x27:x23 <= x26:x22
movw x22, x21 ; x26:x22 <= x25:x21
movw x21, x20 ; x25:x21 <= x24:x20
mov x20, t1j ; x20 <= t1j
mov x24, t0j ; x24 <= t0j
; <<< 1
mov t0j, x37
rol t0j
rol x30
rol x31
rol x32
rol x33
rol x34
rol x35
rol x36
rol x37
; <<< 24
; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5
;mov t0j, x30
;mov x30, x35
;mov x35, x32
;mov x32, x37
;mov x37, x34
;mov x34, x31
;mov x31, x36
;mov x36, x33
;mov x33, t0j
; an intentionally arrangement of registers to facilitate movw
;x30 r16
;x35 r18
;x32 r20
;x37 r22
;x34 r17
;x31 r19
;x36 r21
;x33 r23
movw t0j, x30 ; t1j:t0j <= x34:x30
movw x30, x35 ; x34:x30 <= x31:x35
movw x35, x32 ; x31:x35 <= x36:x32
movw x32, x37 ; x36:x32 <= x33:x37
mov x37, t1j ; x37 <= x34
mov x33, t0j ; x33 <= x30
dec rcnt
breq round_loop_end
jmp round_loop_start
round_loop_end:
ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
st Y+, x10
st Y+, x11
st Y+, x12
st Y+, x13
st Y+, x14
st Y+, x15
st Y+, x16
st Y+, x17
st Y+, x20
st Y+, x21
st Y+, x22
st Y+, x23
st Y+, x24
st Y+, x25
st Y+, x26
st Y+, x27
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
POP_CONFLICT
ret
.section .text
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
RC_LFSR6:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06
.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29
.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28
.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24
.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37
.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26
.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f
.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#elif defined(CRYPTO_AEAD)
RC_LFSR6:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06
.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29
.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28
.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24
.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37
.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26
.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f
.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00
#else
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#endif
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
; an intentionally arrangement of registers to facilitate movw
#define x20 r0
#define x21 r2
#define x22 r4
#define x23 r6
#define x24 r8
#define x25 r10
#define x26 r1
#define x27 r3
#define x28 r5
#define x29 r7
#define x2a r9
#define x2b r11
; an intentionally arrangement of registers to facilitate movw
#define x30 r22
#define x35 r20
#define x3a r18
#define x33 r16
#define x38 r14
#define x31 r12
#define x36 r23
#define x3b r21
#define x34 r19
#define x39 r17
#define x32 r15
#define x37 r13
#define t0j r24
#define t1j r25
#define x0j r25
#define x1j r27
#include "assist.h"
.macro Sbox i0, i1, i2, i3
ldi t0j, 0xFF
eor \i0, t0j
mov t0j, \i1
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
.macro OneColumn i0, i1, i2, i3
ld \i0, Y
ldd \i1, Y + ROW_INBYTES
Sbox \i0, \i1, \i2, \i3
st Y+, \i0
rol \i1 ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES -1, \i1
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
ld x20, Y+
ld x21, Y+
ld x22, Y+
ld x23, Y+
ld x24, Y+
ld x25, Y+
ld x26, Y+
ld x27, Y+
ld x28, Y+
ld x29, Y+
ld x2a, Y+
ld x2b, Y+
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
ld x38, Y+
ld x39, Y+
ld x3a, Y+
ld x3b, Y+
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
ld x0j, Y
eor x0j, t0j
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x20, x30
st Y+, x0j
lsl x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES -1, x1j
OneColumn x0j, x1j, x21, x31
OneColumn x0j, x1j, x22, x32
OneColumn x0j, x1j, x23, x33
OneColumn x0j, x1j, x24, x34
OneColumn x0j, x1j, x25, x35
OneColumn x0j, x1j, x26, x36
OneColumn x0j, x1j, x27, x37
OneColumn x0j, x1j, x28, x38
OneColumn x0j, x1j, x29, x39
OneColumn x0j, x1j, x2a, x3a
OneColumn x0j, x1j, x2b, x3b
ld x1j, Y
eor t0j, t0j
adc x1j, t0j
st Y, x1j
; b a 9 8 7 6 5 4 3 2 1 0
; -- -- -- -- -- -- -- -- -- -- -- x- 0
; -- -- -- -- -- -- -- -- -- -- -- x' 0
; -- -- -- -- -- -- -- -- -- -- x- -- 1
; -- -- -- -- x' -- -- -- -- -- -- -- 7
; 4 3 2 1 0 b a 9 8 7 6 5
; ShiftRows -- the last two rows
; <<< 8
; b a 9 8 7 6 5 4 3 2 1 0 => a 9 8 7 6 5 4 3 2 1 0 b
movw t0j, x25 ; t1j:t0j <= x2b:x25
movw x25, x24 ; x2b:x25 <= x2a:x24
movw x24, x23 ; x2a:x24 <= x29:x23
movw x23, x22 ; x29:x23 <= x28:x22
movw x22, x21 ; x28:x22 <= x27:x21
movw x21, x20 ; x27:x21 <= x26:x20
mov x26, t0j ; x26 <= x25
mov x20, t1j ; x20 <= x2b
; >>> 1
mov t0j, x3b
ror t0j
ror x3a
ror x39
ror x38
ror x37
ror x36
ror x35
ror x34
ror x33
ror x32
ror x31
ror x30
ror x3b
; <<< 56
; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5
; mov x3j, x30
; mov x30, x35
; mov x35, x3a
; mov x3a, x33
; mov x33, x38
; mov x38, x31
; mov x31, x36
; mov x36, x3b
; mov x3b, x34
; mov x34, x39
; mov x39, x32
; mov x32, x37
; mov x37, x3j
; an intentionally arrangement of registers to facilitate movw
; x30 r22
; x35 r20
; x3a r18
; x33 r16
; x38 r14
; x31 r12
; x36 r23
; x3b r21
; x34 r19
; x39 r17
; x32 r15
; x37 r13
movw t0j, x30 ; t1j:t0j <= x36:x30
movw x30, x35 ; x36:x30 <= x3b:x35
movw x35, x3a ; x3b:x35 <= x34:x3a
movw x3a, x33 ; x34:x3a <= x39:x33
movw x33, x38 ; x39:x33 <= x32:x38
movw x38, x31 ; x32:x38 <= x37:x31
mov x31, t1j ; x31 <= x36
mov x37, t0j ; x37 <= x30
dec rcnt
breq round_loop_end
jmp round_loop_start
round_loop_end:
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
st Y+, x20
st Y+, x21
st Y+, x22
st Y+, x23
st Y+, x24
st Y+, x25
st Y+, x26
st Y+, x27
st Y+, x28
st Y+, x29
st Y+, x2a
st Y+, x2b
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
st Y+, x38
st Y+, x39
st Y+, x3a
st Y+, x3b
POP_CONFLICT
ret
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
#define x20 r0
#define x22 r2
#define x24 r4
#define x26 r6
#define x28 r1
#define x2a r3
#define x2c r5
#define x2e r7
#define x30 r8
#define x3d r10
#define x3a r12
#define x37 r14
#define x34 r16
#define x31 r18
#define x3e r20
#define x3b r22
#define x38 r9
#define x35 r11
#define x32 r13
#define x3f r15
#define x3c r17
#define x39 r19
#define x36 r21
#define x33 r23
#define t0j r24
#define t1j r25
#define x0j r25
#define x1j r27
#define x2j r26
#include "assist.h"
.macro Sbox i0, i1, i2, i3
ldi t0j, 0xFF
eor \i0, t0j
mov t0j, \i1
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
.macro TwoColumns i2_e, i3_e, i3_o
; column 2i
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, \i2_e, \i3_e
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
; column 2i+1
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x2j, \i3_o
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
ldd t0j, Y + 2 * ROW_INBYTES + 1
std Y + 2 * ROW_INBYTES + 1, x2j
mov x2j, t0j
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
push rcnt
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
ldd x20, Y + 0x00
ldd x22, Y + 0x02
ldd x24, Y + 0x04
ldd x26, Y + 0x06
ldd x28, Y + 0x08
ldd x2a, Y + 0x0a
ldd x2c, Y + 0x0c
ldd x2e, Y + 0x0e
adiw YL, ROW_INBYTES
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
ld x38, Y+
ld x39, Y+
ld x3a, Y+
ld x3b, Y+
ld x3c, Y+
ld x3d, Y+
ld x3e, Y+
ld x3f, Y+
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH
rjmp For_Hash
For_AEAD:
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
rjmp round_loop_start
For_Hash:
ldi ZL, lo8(RC_LFSR8)
ldi ZH, hi8(RC_LFSR8)
#elif defined(CRYPTO_AEAD)
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
#else
ldi ZL, lo8(RC_LFSR8)
ldi ZH, hi8(RC_LFSR8)
#endif
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
; column 0
ld x0j, Y
eor x0j, t0j
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x20, x30
st Y+, x0j
lsl x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
; column 1
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
ldd x2j, Y + 2 * ROW_INBYTES
Sbox x0j, x1j, x2j, x31
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
ldd t0j, Y + 2 * ROW_INBYTES + 1
std Y + 2 * ROW_INBYTES + 1, x2j
mov x2j, t0j
; column 2, 3
TwoColumns x22, x32, x33
; column 4, 5
TwoColumns x24, x34, x35
; column 6, 7
TwoColumns x26, x36, x37
; column 8, 9
TwoColumns x28, x38, x39
; column 10, 11
TwoColumns x2a, x3a, x3b
; column 12, 13
TwoColumns x2c, x3c, x3d
; column 14
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x2e, x3e
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
; column 15
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x2j, x3f
st Y+, x0j
rol x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES - 1, x1j
ld x1j, Y
eor t0j, t0j
adc x1j, t0j
st Y, x1j
std Y + ROW_INBYTES + 1, x2j
; f e d c b a 9 8 7 6 5 4 3 2 1 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0
; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2
; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3
; c b a 9 8 7 6 5 4 3 2 1 0 f e d
; x2e x2c x2a x28 x26 x24 x22 x20 => x2c x2a x28 x26 x24 x22 x20 x2e
;mov t0j, x2e
;mov x2e, x2c
;mov x2c, x2a
;mov x2a, x28
;mov x28, x26
;mov x26, x24
;mov x24, x22
;mov x22, x20
;mov x20, t0j
; an intentionally arrangement of registers to facilitate movw
movw t0j, x26 ; t1j:t0j <= x2e:x26
movw x26, x24 ; x2e:x26 <= x2c:x24
movw x24, x22 ; x2c:x24 <= x2a:x22
movw x22, x20 ; x2a:x22 <= x28:x20
mov x20, t1j ; x20 <= t1j
mov x28, t0j ; x28 <= t0j
; <<< 1
mov t0j, x3f
rol t0j
rol x30
rol x31
rol x32
rol x33
rol x34
rol x35
rol x36
rol x37
rol x38
rol x39
rol x3a
rol x3b
rol x3c
rol x3d
rol x3e
rol x3f
; <<< 24
; f e d c b a 9 8 7 6 5 4 3 2 1 0 =>
; c b a 9 8 7 6 5 4 3 2 1 0 f e d
; mov x3j, x30
; mov x30, x3d
; mov x3d, x3a
; mov x3a, x37
; mov x37, x34
; mov x34, x31
; mov x31, x3e
; mov x3e, x3b
; mov x3b, x38
; mov x38, x35
; mov x35, x32
; mov x32, x3f
; mov x3f, x3c
; mov x3c, x39
; mov x39, x36
; mov x36, x33
; mov x33, x3j
; an intentionally arrangement of registers to facilitate movw
; x30 r8
; x3d r10
; x3a r12
; x37 r14
; x34 r16
; x31 r18
; x3e r20
; x3b r22
; x38 r9
; x35 r11
; x32 r13
; x3f r15
; x3c r17
; x39 r19
; x36 r21
; x33 r23
movw t0j, x30 ; t1j:t0j <= x38:x30
movw x30, x3d ; x38:x30 <= x35:x3d
movw x3d, x3a ; x35:x3d <= x32:x3a
movw x3a, x37 ; x32:x3a <= x3f:x37
movw x37, x34 ; x3f:x37 <= x3c:x34
movw x34, x31 ; x3c:x34 <= x39:x31
movw x31, x3e ; x39:x31 <= x36:x3e
movw x3e, x3b ; x36:x3e <= x33:x3b
mov x3b, t1j ; x3b <= x38
mov x33, t0j ; x33 <= x30
pop rcnt
dec rcnt
push rcnt
breq round_loop_end
rjmp round_loop_start
round_loop_end:
pop rcnt
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
std Y + 0x00, x20
std Y + 0x02, x22
std Y + 0x04, x24
std Y + 0x06, x26
std Y + 0x08, x28
std Y + 0x0a, x2a
std Y + 0x0c, x2c
std Y + 0x0e, x2e
adiw YL, ROW_INBYTES
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
st Y+, x38
st Y+, x39
st Y+, x3a
st Y+, x3b
st Y+, x3c
st Y+, x3d
st Y+, x3e
st Y+, x3f
POP_CONFLICT
ret
.section .text
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
RC_LFSR8:
.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e
.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25
.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0
.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9
.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37
.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20
.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad
.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac
.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0
.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb
.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a
.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8
.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86
.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7
.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c
.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5
.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4
.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52
.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7
.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76
.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b
.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4
.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd
.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46
.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e
.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab
.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5
.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09
.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe
.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17
.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d
.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00
#elif defined(CRYPTO_AEAD)
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#else
RC_LFSR8:
.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e
.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25
.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0
.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9
.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37
.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20
.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad
.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac
.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0
.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb
.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a
.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8
.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86
.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7
.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c
.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5
.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4
.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52
.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7
.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76
.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b
.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4
.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd
.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46
.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e
.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab
.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5
.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09
.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe
.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17
.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d
.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00
#endif
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
;
; ============================================
; R E G I S T E R D E F I N I T I O N S
; ============================================
;
#define mclen r16
#define radlen r17
#define tcnt r17
#define tmp0 r20
#define tmp1 r21
#define cnt0 r22
#define rn r23
#define rate r24
; AEDH = 0b000: for authenticate AD
; AEDH = 0b001: for encryption
; AEDH = 0b011: for decryption
; AEDH = 0b100: for hash
#define AEDH r25
#define rcnt r26
#if (STATE_INBITS==256)
#include "knot256.h"
#elif (STATE_INBITS==384)
#include "knot384.h"
#elif (STATE_INBITS==512)
#include "knot512.h"
#else
#error "Not specified key size and state size"
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment