diff --git a/knot/Implementations/crypto_aead/knot128v1/avr8_speed/api.h b/knot/Implementations/crypto_aead/knot128v1/avr8_speed/api.h new file mode 100644 index 0000000..51fc844 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/avr8_speed/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v1/avr8_speed/assist.h b/knot/Implementations/crypto_aead/knot128v1/avr8_speed/assist.h new file mode 100644 index 0000000..f95a717 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/avr8_speed/assist.h @@ -0,0 +1,86 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; +.macro PUSH_CONFLICT + push r16 + push r17 + push r18 + push r19 + + push r23 + push r24 + push r25 + push r26 + push r27 + push r28 + push r29 + push r30 + push r31 +.endm + +.macro POP_CONFLICT + pop r31 + pop r30 + pop r29 + pop r28 + pop r27 + pop r26 + pop r25 + pop r24 + pop r23 + + pop r19 + pop r18 + pop r17 + pop r16 +.endm + +.macro PUSH_ALL + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 +.endm + +.macro POP_ALL + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + clr r1 +.endm \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v1/avr8_speed/config.h b/knot/Implementations/crypto_aead/knot128v1/avr8_speed/config.h new file mode 100644 index 0000000..41d8080 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/avr8_speed/config.h @@ -0,0 +1,131 @@ +#ifndef __CONFIG_H__ +#define __CONFIG_H__ + +#define CRYPTO_AEAD +//#define CRYPTO_HASH + +#define MAX_MESSAGE_LENGTH 128 + +#define STATE_INBITS 256 +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBITS 128 +/* For CRYPTO_HASH */ +#define CRYPTO_BITS 256 + +#define STATE_INBYTES ((STATE_INBITS + 7) / 8) +#define ROW_INBITS ((STATE_INBITS + 3) / 4) +#define ROW_INBYTES ((ROW_INBITS + 7) / 8) + +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8) +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES +#define CRYPTO_ABYTES CRYPTO_KEYBYTES +#define CRYPTO_NOOVERLAP 1 + +#define MAX_ASSOCIATED_DATA_LENGTH 32 +#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES) + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +/* For CRYPTO_HASH */ +#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8) + + + +#define DOMAIN_BITS 0x80 +#define PAD_BITS 0x01 +#define S384_R192_BITS 0x80 + +#if (STATE_INBITS==256) +#define C1 1 +#define C2 8 +#define C3 25 +#elif (STATE_INBITS==384) +#define C1 1 +#define C2 8 +#define C3 55 +#elif (STATE_INBITS==512) +#define C1 1 +#define C2 16 +#define C3 25 +#else +#error "Not specified state size" +#endif + +#ifdef CRYPTO_AEAD +/* For CRYPTO_AEAD */ +#define KEY_INBITS (CRYPTO_KEYBYTES * 8) +#define KEY_INBYTES (CRYPTO_KEYBYTES) + +#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8) +#define NONCE_INBYTES (CRYPTO_NPUBBYTES) + +#define TAG_INBITS (CRYPTO_ABYTES * 8) +#define TAG_INBYTES (CRYPTO_ABYTES) + +#if (KEY_INBITS==128) && (STATE_INBITS==256) +#define RATE_INBITS 64 +#define NR_0 52 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==128) && (STATE_INBITS==384) +#define RATE_INBITS 192 +#define NR_0 76 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==192) && (STATE_INBITS==384) +#define RATE_INBITS 96 +#define NR_0 76 +#define NR_i 40 +#define NR_f 44 +#elif (KEY_INBITS==256) && (STATE_INBITS==512) +#define RATE_INBITS 128 +#define NR_0 100 +#define NR_i 52 +#define NR_f 56 +#else +#error "Not specified key size and state size" +#endif + +#define RATE_INBYTES ((RATE_INBITS + 7) / 8) +#define SQUEEZE_RATE_INBYTES TAG_INBYTES + +#endif + +#ifdef CRYPTO_HASH +/* For CRYPTO_HASH */ +#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8) + +#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256) +#define HASH_RATE_INBITS 32 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 68 +#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 128 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 80 +#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 48 +#define HASH_SQUEEZE_RATE_INBITS 192 +#define NR_h 104 +#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512) +#define HASH_RATE_INBITS 64 +#define HASH_SQUEEZE_RATE_INBITS 256 +#define NR_h 140 +#else +#error "Not specified hash digest size and state size" +#endif + +#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8) +#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8) + +#endif + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v1/avr8_speed/crypto_aead.h b/knot/Implementations/crypto_aead/knot128v1/avr8_speed/crypto_aead.h new file mode 100644 index 0000000..cd820d3 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/avr8_speed/crypto_aead.h @@ -0,0 +1,26 @@ +#ifdef __cplusplus +extern "C" { +#endif + +int crypto_aead_encrypt( + unsigned char *c,unsigned long long *clen, + const unsigned char *m,unsigned long long mlen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ); + + +int crypto_aead_decrypt( + unsigned char *m,unsigned long long *outputmlen, + unsigned char *nsec, + const unsigned char *c,unsigned long long clen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ); + +#ifdef __cplusplus +} +#endif diff --git a/knot/Implementations/crypto_aead/knot128v1/avr8_speed/encrypt.c b/knot/Implementations/crypto_aead/knot128v1/avr8_speed/encrypt.c new file mode 100644 index 0000000..baf0a3b --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/avr8_speed/encrypt.c @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#include "config.h" + +extern void crypto_aead_encrypt_asm( + unsigned char *c, + const unsigned char *m, + unsigned char mlen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern int crypto_aead_decrypt_asm( + unsigned char *m, + const unsigned char *c, + unsigned char clen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern void crypto_hash_asm( + unsigned char *out, + const unsigned char *in, + unsigned char inlen + ); + + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the cipher implementation goes here, + ... generating a ciphertext c[0],c[1],...,c[*clen-1] + ... from a plaintext m[0],m[1],...,m[mlen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce npub[0],npub[1],.. + ... and secret key k[0],k[1],... + ... the implementation shall not use nsec + ... + ... return 0; + */ + + (void)nsec; + + crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k); + + *clen = mlen + TAG_INBYTES; + return 0; +} + + + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the AEAD implementation goes here, + ... generating a plaintext m[0],m[1],...,m[*mlen-1] + ... and secret message number nsec[0],nsec[1],... + ... from a ciphertext c[0],c[1],...,c[clen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce number npub[0],npub[1],... + ... and secret key k[0],k[1],... + ... + ... return 0; + */ + unsigned long long mlen_; + unsigned char tag_is_match; + + (void)nsec; + if (clen < CRYPTO_ABYTES) { + return -1; + } + mlen_ = clen - CRYPTO_ABYTES; + + tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k); + + if (tag_is_match != 0) + { + memset(m, 0, (size_t)mlen_); + return -1; + } + + *mlen = mlen_; + return 0; +} \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v1/avr8_speed/encrypt_core.S b/knot/Implementations/crypto_aead/knot128v1/avr8_speed/encrypt_core.S new file mode 100644 index 0000000..bd74f93 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/avr8_speed/encrypt_core.S @@ -0,0 +1,555 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; S R A M D E F I N I T I O N S +; ============================================ +; +#include +#include "config.h" + +.section .noinit + SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (STATE_INBYTES > 32) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (STATE_INBYTES > 48) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_LEN: .BYTE 0, 0 +#ifdef CRYPTO_AEAD +; For CRYPTO_AEAD + SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0 + SRAM_ADLEN: .BYTE 0, 0 + SRAM_NONCE_ADDR: .BYTE 0, 0 + SRAM_KEY_ADDR: .BYTE 0, 0 + + SRAM_ADDITIONAL: + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (CRYPTO_ABYTES > 16) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (CRYPTO_ABYTES > 24) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + +#endif + +.section .text + +#include "permutation.h" + +; require YH:YL be the address of the current associated data/cipher/message block +; for enc and dec, store ciphertext or plaintext +; require ZH:ZL be the address of the current cipher/message block +.macro XOR_to_State_ENCDEC + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov cnt0, rate +XOR_to_State_loop_ENCDEC: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_to_State_loop_ENCDEC +; YH:YL are now the address of the next associated data block +.endm + +; require YH:YL be the address of the current associated data/cipher/message block +; for enc and dec, store ciphertext or plaintext +; require ZH:ZL be the address of the current cipher/message block +.macro XOR_to_State_AUTH + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov cnt0, rate +XOR_to_State_loop_AUTH: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_to_State_loop_AUTH +; YH:YL are now the address of the next associated data block +.endm + + + +; require YH:YL pointed to the input data +; require ZH:ZL pointed to the output data +; require cnt0 containes the nubmer of bytes in source data +; require number of bytes in source data less than rate, i.e., 0 <= cnt0 < rate +; +; the 0th bit in AEDH is used to distinguish (auth AD) or (enc/dec M/C): +; AEDH[0] = 0 for (auth AD), AEDH[0] = 1 for (enc/dec M/C) +; the 1th bit in AEDH is used to distinguish (auth AD/enc M) or (dec C): +; AEDH[1] = 0 for (auth AD/enc M), AEDH[1] = 1 for (dec C) +; AEDH = 0b000 for (auth AD) +; AEDH = 0b001 for (enc M) +; AEDH = 0b011 for (dec C) +Pad_XOR_to_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + tst cnt0 + breq XOR_padded_data +XOR_source_data_loop: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_source_data_loop +XOR_padded_data: + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 +ret + +AddDomain: + ldi XH, hi8(SRAM_STATE + STATE_INBYTES - 1) + ldi XL, lo8(SRAM_STATE + STATE_INBYTES - 1) + ldi tmp0, DOMAIN_BITS + ld tmp1, X + eor tmp0, tmp1 + st X, tmp0 +ret + +; require ZH:ZL be the address of the destination +EXTRACT_from_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov tmp1, rate +EXTRACT_from_State_loop: + ld tmp0, X+ + st Z+, tmp0 + dec tmp1 + brne EXTRACT_from_State_loop +ret + +AUTH: + tst radlen + breq AUTH_end + + cp radlen, rate + brlo auth_ad_padded_block + +auth_ad_loop: + XOR_to_State_AUTH + rcall Permutation + sub radlen, rate + cp radlen, rate + brlo auth_ad_padded_block + rjmp auth_ad_loop + +auth_ad_padded_block: + mov cnt0, radlen + rcall Pad_XOR_to_State + rcall Permutation + +AUTH_end: +ret + +#ifdef CRYPTO_AEAD +Initialization: + ldi rn, NR_0 + ldi XL, lo8(SRAM_STATE) + ldi XH, hi8(SRAM_STATE) + + lds YH, SRAM_NONCE_ADDR + lds YL, SRAM_NONCE_ADDR + 1 + ldi cnt0, CRYPTO_NPUBBYTES +load_nonce_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_nonce_loop + + lds YH, SRAM_KEY_ADDR + lds YL, SRAM_KEY_ADDR + 1 + ldi cnt0, CRYPTO_KEYBYTES +load_key_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_key_loop + +#if (STATE_INBITS==384) && (RATE_INBITS==192) + ldi cnt0, (STATE_INBYTES - CRYPTO_NPUBBYTES - CRYPTO_KEYBYTES - 1) + clr tmp0 +empty_state_loop: + st X+, tmp0 + dec cnt0 + brne empty_state_loop + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + rcall Permutation +ret + +ENC: + tst mclen + breq ENC_end + + cp mclen, rate + brlo enc_padded_block + +enc_loop: + XOR_to_State_ENCDEC + ldi rn, NR_i + rcall Permutation + sub mclen, rate + cp mclen, rate + brlo enc_padded_block + rjmp enc_loop + +enc_padded_block: + mov cnt0, mclen + rcall Pad_XOR_to_State +ENC_end: +ret + +Finalization: + ldi rate, SQUEEZE_RATE_INBYTES + ldi rn, NR_f + rcall Permutation + rcall EXTRACT_from_State +ret + +; void crypto_aead_encrypt_asm( +; unsigned char *c, +; const unsigned char *m, +; unsigned long long mlen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *c, is passed in r24:r25 +; const unsigned char *m, is passed in r22:r23 +; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_encrypt_asm +crypto_aead_encrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b001 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + rcall Finalization + POP_ALL +ret + +; int crypto_aead_decrypt_asm( +; unsigned char *m, +; const unsigned char *c, +; unsigned long long clen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *m, is passed in r24:r25 +; const unsigned char *c, is passed in r22:r23 +; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_decrypt_asm +crypto_aead_decrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b011 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + + ldi ZH, hi8(SRAM_ADDITIONAL) + ldi ZL, lo8(SRAM_ADDITIONAL) + rcall Finalization + + sbiw ZL, CRYPTO_ABYTES + ldi cnt0, CRYPTO_ABYTES +compare_tag: + ld tmp0, Z+ + ld tmp1, Y+ + cp tmp0, tmp1 + brne return_tag_not_match + dec cnt0 + brne compare_tag + rjmp return_tag_match + +return_tag_not_match: + ldi r25, 0xFF + ldi r24, 0xFF + rjmp crypto_aead_decrypt_end + +return_tag_match: + clr r25 + clr r24 +crypto_aead_decrypt_end: + POP_ALL +ret + +; #ifdef CRYPTO_AEAD +#endif + + +#ifdef CRYPTO_HASH + +; void crypto_hash_asm( +; unsigned char *out, +; const unsigned char *in, +; unsigned long long inlen +; ) +; +; unsigned char *out, is passed in r24:r25 +; const unsigned char *in, is passed in r22:r23 +; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used +.global crypto_hash_asm +crypto_hash_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + mov mclen, r20 + + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi cnt0, STATE_INBYTES - 1 +#else + ldi cnt0, STATE_INBYTES +#endif + clr tmp0 +zero_state: + st X+, tmp0 + dec cnt0 + brne zero_state + +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + ldi rn, NR_h + ldi AEDH, 0b100 + +HASH_ABSORBING: + mov radlen, mclen + tst radlen + breq EMPTY_M + ldi rate, HASH_RATE_INBYTES + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + rcall AUTH + rjmp HASH_SQUEEZING + +EMPTY_M: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 + rcall Permutation + +HASH_SQUEEZING: + ldi rate, HASH_SQUEEZE_RATE_INBYTES + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + ldi tcnt, CRYPTO_BYTES +SQUEEZING_loop: + rcall EXTRACT_from_State + subi tcnt, HASH_SQUEEZE_RATE_INBYTES + breq HASH_SQUEEZING_end + rcall Permutation + rjmp SQUEEZING_loop +HASH_SQUEEZING_end: + POP_ALL +ret + +#endif + + +; Byte Order In AVR 8: +; KNOT-AEAD(128, 256, 64): +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[7] <<< 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Perm_row_1[0] 1 +; N[ 9] AEAD_State[ 9] | Perm_row_1[1] 1 +; N[10] AEAD_State[10] | Perm_row_1[2] 1 +; N[11] AEAD_State[11] | Perm_row_1[3] 1 +; N[12] AEAD_State[12] | Perm_row_1[4] 1 +; N[13] AEAD_State[13] | Perm_row_1[5] 1 +; N[14] AEAD_State[14] | Perm_row_1[6] 1 +; N[15] AEAD_State[15] | Perm_row_1[7] <<< 1 +; K[ 0] AEAD_State[16] | Perm_row_2[0] 8 +; K[ 1] AEAD_State[17] | Perm_row_2[1] 8 +; K[ 2] AEAD_State[18] | Perm_row_2[2] 8 +; K[ 3] AEAD_State[19] | Perm_row_2[3] 8 +; K[ 4] AEAD_State[20] | Perm_row_2[4] 8 +; K[ 5] AEAD_State[21] | Perm_row_2[5] 8 +; K[ 6] AEAD_State[22] | Perm_row_2[6] 8 +; K[ 7] AEAD_State[23] | Perm_row_2[7] <<< 8 +; K[ 8] AEAD_State[24] | Perm_row_3[0] 25 +; K[ 9] AEAD_State[25] | Perm_row_3[1] 25 +; K[10] AEAD_State[26] | Perm_row_3[2] 25 +; K[11] AEAD_State[27] | Perm_row_3[3] 25 +; K[12] AEAD_State[28] | Perm_row_3[4] 25 +; K[13] AEAD_State[29] | Perm_row_3[5] 25 +; K[14] AEAD_State[30] | Perm_row_3[6] 25 +; K[15] AEAD_State[31] | ^0x80 Perm_row_3[7] <<< 25 +; +; +; KNOT-AEAD(128, 384, 192): +; Initalization +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[ 0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[ 1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[ 2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[ 3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[ 4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[ 5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[ 6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[ 7] 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Message[ 8] 0x00 Perm_row_0[ 8] 0 Tag[ 8] +; N[ 9] AEAD_State[ 9] | Message[ 9] 0x00 Perm_row_0[ 9] 0 Tag[ 9] +; N[10] AEAD_State[10] | Message[10] 0x00 Perm_row_0[10] 0 Tag[10] +; N[11] AEAD_State[11] | Message[11] 0x00 Perm_row_0[11] <<< 0 Tag[11] +; N[12] AEAD_State[12] | Message[12] 0x00 Perm_row_1[ 0] 1 Tag[12] +; N[13] AEAD_State[13] | Message[13] 0x00 Perm_row_1[ 1] 1 Tag[13] +; N[14] AEAD_State[14] | Message[14] 0x00 Perm_row_1[ 2] 1 Tag[14] +; N[15] AEAD_State[15] | Message[15] 0x00 Perm_row_1[ 3] 1 Tag[15] +; K[ 0] AEAD_State[16] | Message[16] 0x00 Perm_row_1[ 4] 1 +; K[ 1] AEAD_State[17] | Message[17] 0x00 Perm_row_1[ 5] 1 +; K[ 2] AEAD_State[18] | Message[18] 0x00 Perm_row_1[ 6] 1 +; K[ 3] AEAD_State[19] | Message[19] 0x00 Perm_row_1[ 7] 1 +; K[ 4] AEAD_State[20] | Message[20] 0x00 Perm_row_1[ 8] 1 +; K[ 5] AEAD_State[21] | Message[21] 0x00 Perm_row_1[ 9] 1 +; K[ 6] AEAD_State[22] | Message[22] 0x00 Perm_row_1[10] 1 +; K[ 7] AEAD_State[23] | Message[23] 0x00 Perm_row_1[11] <<< 1 +; K[ 8] AEAD_State[24] | Perm_row_2[ 0] 8 +; K[ 9] AEAD_State[25] | Perm_row_2[ 1] 8 +; K[10] AEAD_State[26] | Perm_row_2[ 2] 8 +; K[11] AEAD_State[27] | Perm_row_2[ 3] 8 +; K[12] AEAD_State[28] | Perm_row_2[ 4] 8 +; K[13] AEAD_State[29] | Perm_row_2[ 5] 8 +; K[14] AEAD_State[30] | Perm_row_2[ 6] 8 +; K[15] AEAD_State[31] | Perm_row_2[ 7] 8 +; 0x00 AEAD_State[32] | Perm_row_2[ 8] 8 +; 0x00 AEAD_State[33] | Perm_row_2[ 9] 8 +; 0x00 AEAD_State[34] | Perm_row_2[10] 8 +; 0x00 AEAD_State[35] | Perm_row_2[11] <<< 8 +; 0x00 AEAD_State[36] | Perm_row_3[ 0] 55 +; 0x00 AEAD_State[37] | Perm_row_3[ 1] 55 +; 0x00 AEAD_State[38] | Perm_row_3[ 2] 55 +; 0x00 AEAD_State[39] | Perm_row_3[ 3] 55 +; 0x00 AEAD_State[40] | Perm_row_3[ 4] 55 +; 0x00 AEAD_State[41] | Perm_row_3[ 5] 55 +; 0x00 AEAD_State[42] | Perm_row_3[ 6] 55 +; 0x00 AEAD_State[43] | Perm_row_3[ 7] 55 +; 0x00 AEAD_State[44] | Perm_row_3[ 8] 55 +; 0x00 AEAD_State[45] | Perm_row_3[ 9] 55 +; 0x00 AEAD_State[46] | Perm_row_3[10] 55 +; 0x00 ^0x80 AEAD_State[47] | ^0x80 Perm_row_3[11] <<< 55 diff --git a/knot/Implementations/crypto_aead/knot128v1/avr8_speed/knot256.h b/knot/Implementations/crypto_aead/knot128v1/avr8_speed/knot256.h new file mode 100644 index 0000000..f99f68b --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/avr8_speed/knot256.h @@ -0,0 +1,306 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; +#define x10 r0 +#define x11 r1 +#define x12 r2 +#define x13 r3 +#define x14 r4 +#define x15 r5 +#define x16 r6 +#define x17 r7 + +; an intentionally arrangement of registers to facilitate movw +#define x20 r8 +#define x21 r10 +#define x22 r12 +#define x23 r14 +#define x24 r9 +#define x25 r11 +#define x26 r13 +#define x27 r15 + +; an intentionally arrangement of registers to facilitate movw +#define x30 r16 +#define x35 r18 +#define x32 r20 +#define x37 r22 +#define x34 r17 +#define x31 r19 +#define x36 r21 +#define x33 r23 + +#define t0j r24 +#define t1j r25 +#define x0j r27 + +#include "assist.h" + +.macro Sbox i0, i1, i2, i3 + mov t0j, \i1 + com \i0 + and \i1, \i0 + eor \i1, \i2 + or \i2, t0j + eor \i0, \i3 + eor \i2, \i0 + eor t0j, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, t0j + and t0j, \i2 + eor \i1, t0j +.endm + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + ld x10, Y+ + ld x11, Y+ + ld x12, Y+ + ld x13, Y+ + ld x14, Y+ + ld x15, Y+ + ld x16, Y+ + ld x17, Y+ + ld x20, Y+ + ld x21, Y+ + ld x22, Y+ + ld x23, Y+ + ld x24, Y+ + ld x25, Y+ + ld x26, Y+ + ld x27, Y+ + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH + rjmp For_Hash +For_AEAD: + ldi ZL, lo8(RC_LFSR6) + ldi ZH, hi8(RC_LFSR6) + rjmp round_loop_start +For_Hash: + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) +#elif defined(CRYPTO_AEAD) + ldi ZL, lo8(RC_LFSR6) + ldi ZH, hi8(RC_LFSR6) +#else + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) +#endif + +round_loop_start: + ; AddRC + lpm t0j, Z+ + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + ld x0j, Y + eor x0j, t0j + + ; SubColumns + Sbox x0j, x10, x20, x30 + st Y+, x0j + ld x0j, Y + Sbox x0j, x11, x21, x31 + st Y+, x0j + ld x0j, Y + Sbox x0j, x12, x22, x32 + st Y+, x0j + ld x0j, Y + Sbox x0j, x13, x23, x33 + st Y+, x0j + ld x0j, Y + Sbox x0j, x14, x24, x34 + st Y+, x0j + ld x0j, Y + Sbox x0j, x15, x25, x35 + st Y+, x0j + ld x0j, Y + Sbox x0j, x16, x26, x36 + st Y+, x0j + ld x0j, Y + Sbox x0j, x17, x27, x37 + st Y, x0j + + ; ShiftRows + ; <<< 1 + mov t0j, x17 + rol t0j + rol x10 + rol x11 + rol x12 + rol x13 + rol x14 + rol x15 + rol x16 + rol x17 + + ; <<< 8 + ; 7 6 5 4 3 2 1 0 => 6 5 4 3 2 1 0 7 + ;mov t0j, x27 + ;mov x27, x26 + ;mov x26, x25 + ;mov x25, x24 + ;mov x24, x23 + ;mov x23, x22 + ;mov x22, x21 + ;mov x21, x20 + ;mov x20, t0j + ; an intentionally arrangement of registers to facilitate movw + movw t0j, x23 ; t1j:t0j <= x27:x23 + movw x23, x22 ; x27:x23 <= x26:x22 + movw x22, x21 ; x26:x22 <= x25:x21 + movw x21, x20 ; x25:x21 <= x24:x20 + mov x20, t1j ; x20 <= t1j + mov x24, t0j ; x24 <= t0j + + ; <<< 1 + mov t0j, x37 + rol t0j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + ; <<< 24 + ; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5 + ;mov t0j, x30 + ;mov x30, x35 + ;mov x35, x32 + ;mov x32, x37 + ;mov x37, x34 + ;mov x34, x31 + ;mov x31, x36 + ;mov x36, x33 + ;mov x33, t0j + ; an intentionally arrangement of registers to facilitate movw + ;x30 r16 + ;x35 r18 + ;x32 r20 + ;x37 r22 + ;x34 r17 + ;x31 r19 + ;x36 r21 + ;x33 r23 + movw t0j, x30 ; t1j:t0j <= x34:x30 + movw x30, x35 ; x34:x30 <= x31:x35 + movw x35, x32 ; x31:x35 <= x36:x32 + movw x32, x37 ; x36:x32 <= x33:x37 + mov x37, t1j ; x37 <= x34 + mov x33, t0j ; x33 <= x30 + + dec rcnt + breq round_loop_end + jmp round_loop_start + +round_loop_end: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + st Y+, x10 + st Y+, x11 + st Y+, x12 + st Y+, x13 + st Y+, x14 + st Y+, x15 + st Y+, x16 + st Y+, x17 + st Y+, x20 + st Y+, x21 + st Y+, x22 + st Y+, x23 + st Y+, x24 + st Y+, x25 + st Y+, x26 + st Y+, x27 + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + + POP_CONFLICT +ret + + +.section .text +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +RC_LFSR6: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06 +.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29 +.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28 +.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24 +.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37 +.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26 +.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f +.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00 +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +#elif defined(CRYPTO_AEAD) +RC_LFSR6: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06 +.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29 +.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28 +.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24 +.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37 +.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26 +.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f +.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00 +#else +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v1/avr8_speed/knot384.h b/knot/Implementations/crypto_aead/knot128v1/avr8_speed/knot384.h new file mode 100644 index 0000000..0b3dd75 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/avr8_speed/knot384.h @@ -0,0 +1,261 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; + +; an intentionally arrangement of registers to facilitate movw +#define x20 r0 +#define x21 r2 +#define x22 r4 +#define x23 r6 +#define x24 r8 +#define x25 r10 +#define x26 r1 +#define x27 r3 +#define x28 r5 +#define x29 r7 +#define x2a r9 +#define x2b r11 + +; an intentionally arrangement of registers to facilitate movw +#define x30 r22 +#define x35 r20 +#define x3a r18 +#define x33 r16 +#define x38 r14 +#define x31 r12 +#define x36 r23 +#define x3b r21 +#define x34 r19 +#define x39 r17 +#define x32 r15 +#define x37 r13 + +#define t0j r24 +#define t1j r25 +#define x0j r25 +#define x1j r27 + +#include "assist.h" + +.macro Sbox i0, i1, i2, i3 + ldi t0j, 0xFF + eor \i0, t0j + mov t0j, \i1 + and \i1, \i0 + eor \i1, \i2 + or \i2, t0j + eor \i0, \i3 + eor \i2, \i0 + eor t0j, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, t0j + and t0j, \i2 + eor \i1, t0j +.endm + +.macro OneColumn i0, i1, i2, i3 + ld \i0, Y + ldd \i1, Y + ROW_INBYTES + Sbox \i0, \i1, \i2, \i3 + st Y+, \i0 + rol \i1 ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES -1, \i1 +.endm + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + ld x20, Y+ + ld x21, Y+ + ld x22, Y+ + ld x23, Y+ + ld x24, Y+ + ld x25, Y+ + ld x26, Y+ + ld x27, Y+ + ld x28, Y+ + ld x29, Y+ + ld x2a, Y+ + ld x2b, Y+ + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) + +round_loop_start: + ; AddRC + lpm t0j, Z+ + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + ld x0j, Y + eor x0j, t0j + + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x20, x30 + st Y+, x0j + lsl x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES -1, x1j + + OneColumn x0j, x1j, x21, x31 + OneColumn x0j, x1j, x22, x32 + OneColumn x0j, x1j, x23, x33 + OneColumn x0j, x1j, x24, x34 + OneColumn x0j, x1j, x25, x35 + OneColumn x0j, x1j, x26, x36 + OneColumn x0j, x1j, x27, x37 + OneColumn x0j, x1j, x28, x38 + OneColumn x0j, x1j, x29, x39 + OneColumn x0j, x1j, x2a, x3a + OneColumn x0j, x1j, x2b, x3b + + ld x1j, Y + eor t0j, t0j + adc x1j, t0j + st Y, x1j + + ; b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- x- -- 1 + ; -- -- -- -- x' -- -- -- -- -- -- -- 7 + ; 4 3 2 1 0 b a 9 8 7 6 5 + + ; ShiftRows -- the last two rows + ; <<< 8 + ; b a 9 8 7 6 5 4 3 2 1 0 => a 9 8 7 6 5 4 3 2 1 0 b + movw t0j, x25 ; t1j:t0j <= x2b:x25 + movw x25, x24 ; x2b:x25 <= x2a:x24 + movw x24, x23 ; x2a:x24 <= x29:x23 + movw x23, x22 ; x29:x23 <= x28:x22 + movw x22, x21 ; x28:x22 <= x27:x21 + movw x21, x20 ; x27:x21 <= x26:x20 + mov x26, t0j ; x26 <= x25 + mov x20, t1j ; x20 <= x2b + + ; >>> 1 + mov t0j, x3b + ror t0j + ror x3a + ror x39 + ror x38 + ror x37 + ror x36 + ror x35 + ror x34 + ror x33 + ror x32 + ror x31 + ror x30 + ror x3b + ; <<< 56 + ; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5 + ; mov x3j, x30 + ; mov x30, x35 + ; mov x35, x3a + ; mov x3a, x33 + ; mov x33, x38 + ; mov x38, x31 + ; mov x31, x36 + ; mov x36, x3b + ; mov x3b, x34 + ; mov x34, x39 + ; mov x39, x32 + ; mov x32, x37 + ; mov x37, x3j + ; an intentionally arrangement of registers to facilitate movw + ; x30 r22 + ; x35 r20 + ; x3a r18 + ; x33 r16 + ; x38 r14 + ; x31 r12 + ; x36 r23 + ; x3b r21 + ; x34 r19 + ; x39 r17 + ; x32 r15 + ; x37 r13 + movw t0j, x30 ; t1j:t0j <= x36:x30 + movw x30, x35 ; x36:x30 <= x3b:x35 + movw x35, x3a ; x3b:x35 <= x34:x3a + movw x3a, x33 ; x34:x3a <= x39:x33 + movw x33, x38 ; x39:x33 <= x32:x38 + movw x38, x31 ; x32:x38 <= x37:x31 + mov x31, t1j ; x31 <= x36 + mov x37, t0j ; x37 <= x30 + + dec rcnt + breq round_loop_end + jmp round_loop_start + +round_loop_end: + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + st Y+, x20 + st Y+, x21 + st Y+, x22 + st Y+, x23 + st Y+, x24 + st Y+, x25 + st Y+, x26 + st Y+, x27 + st Y+, x28 + st Y+, x29 + st Y+, x2a + st Y+, x2b + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + + POP_CONFLICT +ret + +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v1/avr8_speed/knot512.h b/knot/Implementations/crypto_aead/knot128v1/avr8_speed/knot512.h new file mode 100644 index 0000000..b0e4319 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/avr8_speed/knot512.h @@ -0,0 +1,435 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; +#define x20 r0 +#define x22 r2 +#define x24 r4 +#define x26 r6 +#define x28 r1 +#define x2a r3 +#define x2c r5 +#define x2e r7 + +#define x30 r8 +#define x3d r10 +#define x3a r12 +#define x37 r14 +#define x34 r16 +#define x31 r18 +#define x3e r20 +#define x3b r22 +#define x38 r9 +#define x35 r11 +#define x32 r13 +#define x3f r15 +#define x3c r17 +#define x39 r19 +#define x36 r21 +#define x33 r23 + +#define t0j r24 +#define t1j r25 +#define x0j r25 +#define x1j r27 +#define x2j r26 + +#include "assist.h" + +.macro Sbox i0, i1, i2, i3 + ldi t0j, 0xFF + eor \i0, t0j + mov t0j, \i1 + and \i1, \i0 + eor \i1, \i2 + or \i2, t0j + eor \i0, \i3 + eor \i2, \i0 + eor t0j, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, t0j + and t0j, \i2 + eor \i1, t0j +.endm + +.macro TwoColumns i2_e, i3_e, i3_o + ; column 2i + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, \i2_e, \i3_e + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ; column 2i+1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x2j, \i3_o + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + ldd t0j, Y + 2 * ROW_INBYTES + 1 + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t0j +.endm + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + push rcnt + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + + ldd x20, Y + 0x00 + ldd x22, Y + 0x02 + ldd x24, Y + 0x04 + ldd x26, Y + 0x06 + ldd x28, Y + 0x08 + ldd x2a, Y + 0x0a + ldd x2c, Y + 0x0c + ldd x2e, Y + 0x0e + + adiw YL, ROW_INBYTES + + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + ld x3c, Y+ + ld x3d, Y+ + ld x3e, Y+ + ld x3f, Y+ + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH + rjmp For_Hash +For_AEAD: + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) + rjmp round_loop_start +For_Hash: + ldi ZL, lo8(RC_LFSR8) + ldi ZH, hi8(RC_LFSR8) +#elif defined(CRYPTO_AEAD) + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) +#else + ldi ZL, lo8(RC_LFSR8) + ldi ZH, hi8(RC_LFSR8) +#endif + + +round_loop_start: + ; AddRC + lpm t0j, Z+ + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + + ; column 0 + ld x0j, Y + eor x0j, t0j + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x20, x30 + st Y+, x0j + lsl x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ; column 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + Sbox x0j, x1j, x2j, x31 + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + ldd t0j, Y + 2 * ROW_INBYTES + 1 + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t0j + + ; column 2, 3 + TwoColumns x22, x32, x33 + ; column 4, 5 + TwoColumns x24, x34, x35 + ; column 6, 7 + TwoColumns x26, x36, x37 + ; column 8, 9 + TwoColumns x28, x38, x39 + ; column 10, 11 + TwoColumns x2a, x3a, x3b + ; column 12, 13 + TwoColumns x2c, x3c, x3d + + ; column 14 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x2e, x3e + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ; column 15 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x2j, x3f + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ld x1j, Y + eor t0j, t0j + adc x1j, t0j + st Y, x1j + std Y + ROW_INBYTES + 1, x2j + + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2 + ; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3 + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + ; x2e x2c x2a x28 x26 x24 x22 x20 => x2c x2a x28 x26 x24 x22 x20 x2e + ;mov t0j, x2e + ;mov x2e, x2c + ;mov x2c, x2a + ;mov x2a, x28 + ;mov x28, x26 + ;mov x26, x24 + ;mov x24, x22 + ;mov x22, x20 + ;mov x20, t0j + ; an intentionally arrangement of registers to facilitate movw + movw t0j, x26 ; t1j:t0j <= x2e:x26 + movw x26, x24 ; x2e:x26 <= x2c:x24 + movw x24, x22 ; x2c:x24 <= x2a:x22 + movw x22, x20 ; x2a:x22 <= x28:x20 + mov x20, t1j ; x20 <= t1j + mov x28, t0j ; x28 <= t0j + + ; <<< 1 + mov t0j, x3f + rol t0j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + rol x38 + rol x39 + rol x3a + rol x3b + rol x3c + rol x3d + rol x3e + rol x3f + ; <<< 24 + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 => + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + ; mov x3j, x30 + ; mov x30, x3d + ; mov x3d, x3a + ; mov x3a, x37 + ; mov x37, x34 + ; mov x34, x31 + ; mov x31, x3e + ; mov x3e, x3b + ; mov x3b, x38 + ; mov x38, x35 + ; mov x35, x32 + ; mov x32, x3f + ; mov x3f, x3c + ; mov x3c, x39 + ; mov x39, x36 + ; mov x36, x33 + ; mov x33, x3j + ; an intentionally arrangement of registers to facilitate movw + ; x30 r8 + ; x3d r10 + ; x3a r12 + ; x37 r14 + ; x34 r16 + ; x31 r18 + ; x3e r20 + ; x3b r22 + ; x38 r9 + ; x35 r11 + ; x32 r13 + ; x3f r15 + ; x3c r17 + ; x39 r19 + ; x36 r21 + ; x33 r23 + movw t0j, x30 ; t1j:t0j <= x38:x30 + movw x30, x3d ; x38:x30 <= x35:x3d + movw x3d, x3a ; x35:x3d <= x32:x3a + movw x3a, x37 ; x32:x3a <= x3f:x37 + movw x37, x34 ; x3f:x37 <= x3c:x34 + movw x34, x31 ; x3c:x34 <= x39:x31 + movw x31, x3e ; x39:x31 <= x36:x3e + movw x3e, x3b ; x36:x3e <= x33:x3b + mov x3b, t1j ; x3b <= x38 + mov x33, t0j ; x33 <= x30 + + pop rcnt + dec rcnt + push rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + pop rcnt + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + std Y + 0x00, x20 + std Y + 0x02, x22 + std Y + 0x04, x24 + std Y + 0x06, x26 + std Y + 0x08, x28 + std Y + 0x0a, x2a + std Y + 0x0c, x2c + std Y + 0x0e, x2e + adiw YL, ROW_INBYTES + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + st Y+, x3c + st Y+, x3d + st Y+, x3e + st Y+, x3f + + POP_CONFLICT +ret + +.section .text +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +RC_LFSR8: +.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e +.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25 +.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0 +.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9 +.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37 +.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20 +.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad +.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac +.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0 +.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb +.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a +.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8 +.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86 +.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7 +.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c +.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5 +.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4 +.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52 +.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7 +.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76 +.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b +.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4 +.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd +.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46 +.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e +.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab +.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5 +.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09 +.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe +.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17 +.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d +.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00 +#elif defined(CRYPTO_AEAD) +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +#else +RC_LFSR8: +.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e +.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25 +.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0 +.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9 +.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37 +.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20 +.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad +.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac +.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0 +.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb +.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a +.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8 +.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86 +.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7 +.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c +.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5 +.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4 +.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52 +.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7 +.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76 +.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b +.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4 +.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd +.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46 +.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e +.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab +.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5 +.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09 +.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe +.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17 +.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d +.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00 +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v1/avr8_speed/permutation.h b/knot/Implementations/crypto_aead/knot128v1/avr8_speed/permutation.h new file mode 100644 index 0000000..e6c9793 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/avr8_speed/permutation.h @@ -0,0 +1,45 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; R E G I S T E R D E F I N I T I O N S +; ============================================ +; + +#define mclen r16 +#define radlen r17 +#define tcnt r17 +#define tmp0 r20 +#define tmp1 r21 +#define cnt0 r22 +#define rn r23 +#define rate r24 + + +; AEDH = 0b000: for authenticate AD +; AEDH = 0b001: for encryption +; AEDH = 0b011: for decryption +; AEDH = 0b100: for hash +#define AEDH r25 +#define rcnt r26 + +#if (STATE_INBITS==256) +#include "knot256.h" +#elif (STATE_INBITS==384) +#include "knot384.h" +#elif (STATE_INBITS==512) +#include "knot512.h" +#else +#error "Not specified key size and state size" +#endif + + diff --git a/knot/Implementations/crypto_aead/knot128v2/avr8_speed/api.h b/knot/Implementations/crypto_aead/knot128v2/avr8_speed/api.h new file mode 100644 index 0000000..51fc844 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/avr8_speed/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v2/avr8_speed/assist.h b/knot/Implementations/crypto_aead/knot128v2/avr8_speed/assist.h new file mode 100644 index 0000000..f95a717 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/avr8_speed/assist.h @@ -0,0 +1,86 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; +.macro PUSH_CONFLICT + push r16 + push r17 + push r18 + push r19 + + push r23 + push r24 + push r25 + push r26 + push r27 + push r28 + push r29 + push r30 + push r31 +.endm + +.macro POP_CONFLICT + pop r31 + pop r30 + pop r29 + pop r28 + pop r27 + pop r26 + pop r25 + pop r24 + pop r23 + + pop r19 + pop r18 + pop r17 + pop r16 +.endm + +.macro PUSH_ALL + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 +.endm + +.macro POP_ALL + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + clr r1 +.endm \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v2/avr8_speed/config.h b/knot/Implementations/crypto_aead/knot128v2/avr8_speed/config.h new file mode 100644 index 0000000..98114a9 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/avr8_speed/config.h @@ -0,0 +1,131 @@ +#ifndef __CONFIG_H__ +#define __CONFIG_H__ + +#define CRYPTO_AEAD +//#define CRYPTO_HASH + +#define MAX_MESSAGE_LENGTH 128 + +#define STATE_INBITS 384 +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBITS 128 +/* For CRYPTO_HASH */ +#define CRYPTO_BITS 256 + +#define STATE_INBYTES ((STATE_INBITS + 7) / 8) +#define ROW_INBITS ((STATE_INBITS + 3) / 4) +#define ROW_INBYTES ((ROW_INBITS + 7) / 8) + +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8) +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES +#define CRYPTO_ABYTES CRYPTO_KEYBYTES +#define CRYPTO_NOOVERLAP 1 + +#define MAX_ASSOCIATED_DATA_LENGTH 32 +#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES) + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +/* For CRYPTO_HASH */ +#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8) + + + +#define DOMAIN_BITS 0x80 +#define PAD_BITS 0x01 +#define S384_R192_BITS 0x80 + +#if (STATE_INBITS==256) +#define C1 1 +#define C2 8 +#define C3 25 +#elif (STATE_INBITS==384) +#define C1 1 +#define C2 8 +#define C3 55 +#elif (STATE_INBITS==512) +#define C1 1 +#define C2 16 +#define C3 25 +#else +#error "Not specified state size" +#endif + +#ifdef CRYPTO_AEAD +/* For CRYPTO_AEAD */ +#define KEY_INBITS (CRYPTO_KEYBYTES * 8) +#define KEY_INBYTES (CRYPTO_KEYBYTES) + +#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8) +#define NONCE_INBYTES (CRYPTO_NPUBBYTES) + +#define TAG_INBITS (CRYPTO_ABYTES * 8) +#define TAG_INBYTES (CRYPTO_ABYTES) + +#if (KEY_INBITS==128) && (STATE_INBITS==256) +#define RATE_INBITS 64 +#define NR_0 52 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==128) && (STATE_INBITS==384) +#define RATE_INBITS 192 +#define NR_0 76 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==192) && (STATE_INBITS==384) +#define RATE_INBITS 96 +#define NR_0 76 +#define NR_i 40 +#define NR_f 44 +#elif (KEY_INBITS==256) && (STATE_INBITS==512) +#define RATE_INBITS 128 +#define NR_0 100 +#define NR_i 52 +#define NR_f 56 +#else +#error "Not specified key size and state size" +#endif + +#define RATE_INBYTES ((RATE_INBITS + 7) / 8) +#define SQUEEZE_RATE_INBYTES TAG_INBYTES + +#endif + +#ifdef CRYPTO_HASH +/* For CRYPTO_HASH */ +#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8) + +#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256) +#define HASH_RATE_INBITS 32 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 68 +#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 128 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 80 +#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 48 +#define HASH_SQUEEZE_RATE_INBITS 192 +#define NR_h 104 +#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512) +#define HASH_RATE_INBITS 64 +#define HASH_SQUEEZE_RATE_INBITS 256 +#define NR_h 140 +#else +#error "Not specified hash digest size and state size" +#endif + +#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8) +#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8) + +#endif + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v2/avr8_speed/crypto_aead.h b/knot/Implementations/crypto_aead/knot128v2/avr8_speed/crypto_aead.h new file mode 100644 index 0000000..cd820d3 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/avr8_speed/crypto_aead.h @@ -0,0 +1,26 @@ +#ifdef __cplusplus +extern "C" { +#endif + +int crypto_aead_encrypt( + unsigned char *c,unsigned long long *clen, + const unsigned char *m,unsigned long long mlen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ); + + +int crypto_aead_decrypt( + unsigned char *m,unsigned long long *outputmlen, + unsigned char *nsec, + const unsigned char *c,unsigned long long clen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ); + +#ifdef __cplusplus +} +#endif diff --git a/knot/Implementations/crypto_aead/knot128v2/avr8_speed/encrypt.c b/knot/Implementations/crypto_aead/knot128v2/avr8_speed/encrypt.c new file mode 100644 index 0000000..baf0a3b --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/avr8_speed/encrypt.c @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#include "config.h" + +extern void crypto_aead_encrypt_asm( + unsigned char *c, + const unsigned char *m, + unsigned char mlen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern int crypto_aead_decrypt_asm( + unsigned char *m, + const unsigned char *c, + unsigned char clen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern void crypto_hash_asm( + unsigned char *out, + const unsigned char *in, + unsigned char inlen + ); + + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the cipher implementation goes here, + ... generating a ciphertext c[0],c[1],...,c[*clen-1] + ... from a plaintext m[0],m[1],...,m[mlen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce npub[0],npub[1],.. + ... and secret key k[0],k[1],... + ... the implementation shall not use nsec + ... + ... return 0; + */ + + (void)nsec; + + crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k); + + *clen = mlen + TAG_INBYTES; + return 0; +} + + + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the AEAD implementation goes here, + ... generating a plaintext m[0],m[1],...,m[*mlen-1] + ... and secret message number nsec[0],nsec[1],... + ... from a ciphertext c[0],c[1],...,c[clen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce number npub[0],npub[1],... + ... and secret key k[0],k[1],... + ... + ... return 0; + */ + unsigned long long mlen_; + unsigned char tag_is_match; + + (void)nsec; + if (clen < CRYPTO_ABYTES) { + return -1; + } + mlen_ = clen - CRYPTO_ABYTES; + + tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k); + + if (tag_is_match != 0) + { + memset(m, 0, (size_t)mlen_); + return -1; + } + + *mlen = mlen_; + return 0; +} \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v2/avr8_speed/encrypt_core.S b/knot/Implementations/crypto_aead/knot128v2/avr8_speed/encrypt_core.S new file mode 100644 index 0000000..bd74f93 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/avr8_speed/encrypt_core.S @@ -0,0 +1,555 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; S R A M D E F I N I T I O N S +; ============================================ +; +#include +#include "config.h" + +.section .noinit + SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (STATE_INBYTES > 32) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (STATE_INBYTES > 48) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_LEN: .BYTE 0, 0 +#ifdef CRYPTO_AEAD +; For CRYPTO_AEAD + SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0 + SRAM_ADLEN: .BYTE 0, 0 + SRAM_NONCE_ADDR: .BYTE 0, 0 + SRAM_KEY_ADDR: .BYTE 0, 0 + + SRAM_ADDITIONAL: + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (CRYPTO_ABYTES > 16) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (CRYPTO_ABYTES > 24) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + +#endif + +.section .text + +#include "permutation.h" + +; require YH:YL be the address of the current associated data/cipher/message block +; for enc and dec, store ciphertext or plaintext +; require ZH:ZL be the address of the current cipher/message block +.macro XOR_to_State_ENCDEC + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov cnt0, rate +XOR_to_State_loop_ENCDEC: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_to_State_loop_ENCDEC +; YH:YL are now the address of the next associated data block +.endm + +; require YH:YL be the address of the current associated data/cipher/message block +; for enc and dec, store ciphertext or plaintext +; require ZH:ZL be the address of the current cipher/message block +.macro XOR_to_State_AUTH + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov cnt0, rate +XOR_to_State_loop_AUTH: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_to_State_loop_AUTH +; YH:YL are now the address of the next associated data block +.endm + + + +; require YH:YL pointed to the input data +; require ZH:ZL pointed to the output data +; require cnt0 containes the nubmer of bytes in source data +; require number of bytes in source data less than rate, i.e., 0 <= cnt0 < rate +; +; the 0th bit in AEDH is used to distinguish (auth AD) or (enc/dec M/C): +; AEDH[0] = 0 for (auth AD), AEDH[0] = 1 for (enc/dec M/C) +; the 1th bit in AEDH is used to distinguish (auth AD/enc M) or (dec C): +; AEDH[1] = 0 for (auth AD/enc M), AEDH[1] = 1 for (dec C) +; AEDH = 0b000 for (auth AD) +; AEDH = 0b001 for (enc M) +; AEDH = 0b011 for (dec C) +Pad_XOR_to_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + tst cnt0 + breq XOR_padded_data +XOR_source_data_loop: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_source_data_loop +XOR_padded_data: + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 +ret + +AddDomain: + ldi XH, hi8(SRAM_STATE + STATE_INBYTES - 1) + ldi XL, lo8(SRAM_STATE + STATE_INBYTES - 1) + ldi tmp0, DOMAIN_BITS + ld tmp1, X + eor tmp0, tmp1 + st X, tmp0 +ret + +; require ZH:ZL be the address of the destination +EXTRACT_from_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov tmp1, rate +EXTRACT_from_State_loop: + ld tmp0, X+ + st Z+, tmp0 + dec tmp1 + brne EXTRACT_from_State_loop +ret + +AUTH: + tst radlen + breq AUTH_end + + cp radlen, rate + brlo auth_ad_padded_block + +auth_ad_loop: + XOR_to_State_AUTH + rcall Permutation + sub radlen, rate + cp radlen, rate + brlo auth_ad_padded_block + rjmp auth_ad_loop + +auth_ad_padded_block: + mov cnt0, radlen + rcall Pad_XOR_to_State + rcall Permutation + +AUTH_end: +ret + +#ifdef CRYPTO_AEAD +Initialization: + ldi rn, NR_0 + ldi XL, lo8(SRAM_STATE) + ldi XH, hi8(SRAM_STATE) + + lds YH, SRAM_NONCE_ADDR + lds YL, SRAM_NONCE_ADDR + 1 + ldi cnt0, CRYPTO_NPUBBYTES +load_nonce_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_nonce_loop + + lds YH, SRAM_KEY_ADDR + lds YL, SRAM_KEY_ADDR + 1 + ldi cnt0, CRYPTO_KEYBYTES +load_key_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_key_loop + +#if (STATE_INBITS==384) && (RATE_INBITS==192) + ldi cnt0, (STATE_INBYTES - CRYPTO_NPUBBYTES - CRYPTO_KEYBYTES - 1) + clr tmp0 +empty_state_loop: + st X+, tmp0 + dec cnt0 + brne empty_state_loop + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + rcall Permutation +ret + +ENC: + tst mclen + breq ENC_end + + cp mclen, rate + brlo enc_padded_block + +enc_loop: + XOR_to_State_ENCDEC + ldi rn, NR_i + rcall Permutation + sub mclen, rate + cp mclen, rate + brlo enc_padded_block + rjmp enc_loop + +enc_padded_block: + mov cnt0, mclen + rcall Pad_XOR_to_State +ENC_end: +ret + +Finalization: + ldi rate, SQUEEZE_RATE_INBYTES + ldi rn, NR_f + rcall Permutation + rcall EXTRACT_from_State +ret + +; void crypto_aead_encrypt_asm( +; unsigned char *c, +; const unsigned char *m, +; unsigned long long mlen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *c, is passed in r24:r25 +; const unsigned char *m, is passed in r22:r23 +; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_encrypt_asm +crypto_aead_encrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b001 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + rcall Finalization + POP_ALL +ret + +; int crypto_aead_decrypt_asm( +; unsigned char *m, +; const unsigned char *c, +; unsigned long long clen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *m, is passed in r24:r25 +; const unsigned char *c, is passed in r22:r23 +; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_decrypt_asm +crypto_aead_decrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b011 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + + ldi ZH, hi8(SRAM_ADDITIONAL) + ldi ZL, lo8(SRAM_ADDITIONAL) + rcall Finalization + + sbiw ZL, CRYPTO_ABYTES + ldi cnt0, CRYPTO_ABYTES +compare_tag: + ld tmp0, Z+ + ld tmp1, Y+ + cp tmp0, tmp1 + brne return_tag_not_match + dec cnt0 + brne compare_tag + rjmp return_tag_match + +return_tag_not_match: + ldi r25, 0xFF + ldi r24, 0xFF + rjmp crypto_aead_decrypt_end + +return_tag_match: + clr r25 + clr r24 +crypto_aead_decrypt_end: + POP_ALL +ret + +; #ifdef CRYPTO_AEAD +#endif + + +#ifdef CRYPTO_HASH + +; void crypto_hash_asm( +; unsigned char *out, +; const unsigned char *in, +; unsigned long long inlen +; ) +; +; unsigned char *out, is passed in r24:r25 +; const unsigned char *in, is passed in r22:r23 +; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used +.global crypto_hash_asm +crypto_hash_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + mov mclen, r20 + + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi cnt0, STATE_INBYTES - 1 +#else + ldi cnt0, STATE_INBYTES +#endif + clr tmp0 +zero_state: + st X+, tmp0 + dec cnt0 + brne zero_state + +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + ldi rn, NR_h + ldi AEDH, 0b100 + +HASH_ABSORBING: + mov radlen, mclen + tst radlen + breq EMPTY_M + ldi rate, HASH_RATE_INBYTES + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + rcall AUTH + rjmp HASH_SQUEEZING + +EMPTY_M: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 + rcall Permutation + +HASH_SQUEEZING: + ldi rate, HASH_SQUEEZE_RATE_INBYTES + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + ldi tcnt, CRYPTO_BYTES +SQUEEZING_loop: + rcall EXTRACT_from_State + subi tcnt, HASH_SQUEEZE_RATE_INBYTES + breq HASH_SQUEEZING_end + rcall Permutation + rjmp SQUEEZING_loop +HASH_SQUEEZING_end: + POP_ALL +ret + +#endif + + +; Byte Order In AVR 8: +; KNOT-AEAD(128, 256, 64): +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[7] <<< 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Perm_row_1[0] 1 +; N[ 9] AEAD_State[ 9] | Perm_row_1[1] 1 +; N[10] AEAD_State[10] | Perm_row_1[2] 1 +; N[11] AEAD_State[11] | Perm_row_1[3] 1 +; N[12] AEAD_State[12] | Perm_row_1[4] 1 +; N[13] AEAD_State[13] | Perm_row_1[5] 1 +; N[14] AEAD_State[14] | Perm_row_1[6] 1 +; N[15] AEAD_State[15] | Perm_row_1[7] <<< 1 +; K[ 0] AEAD_State[16] | Perm_row_2[0] 8 +; K[ 1] AEAD_State[17] | Perm_row_2[1] 8 +; K[ 2] AEAD_State[18] | Perm_row_2[2] 8 +; K[ 3] AEAD_State[19] | Perm_row_2[3] 8 +; K[ 4] AEAD_State[20] | Perm_row_2[4] 8 +; K[ 5] AEAD_State[21] | Perm_row_2[5] 8 +; K[ 6] AEAD_State[22] | Perm_row_2[6] 8 +; K[ 7] AEAD_State[23] | Perm_row_2[7] <<< 8 +; K[ 8] AEAD_State[24] | Perm_row_3[0] 25 +; K[ 9] AEAD_State[25] | Perm_row_3[1] 25 +; K[10] AEAD_State[26] | Perm_row_3[2] 25 +; K[11] AEAD_State[27] | Perm_row_3[3] 25 +; K[12] AEAD_State[28] | Perm_row_3[4] 25 +; K[13] AEAD_State[29] | Perm_row_3[5] 25 +; K[14] AEAD_State[30] | Perm_row_3[6] 25 +; K[15] AEAD_State[31] | ^0x80 Perm_row_3[7] <<< 25 +; +; +; KNOT-AEAD(128, 384, 192): +; Initalization +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[ 0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[ 1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[ 2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[ 3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[ 4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[ 5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[ 6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[ 7] 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Message[ 8] 0x00 Perm_row_0[ 8] 0 Tag[ 8] +; N[ 9] AEAD_State[ 9] | Message[ 9] 0x00 Perm_row_0[ 9] 0 Tag[ 9] +; N[10] AEAD_State[10] | Message[10] 0x00 Perm_row_0[10] 0 Tag[10] +; N[11] AEAD_State[11] | Message[11] 0x00 Perm_row_0[11] <<< 0 Tag[11] +; N[12] AEAD_State[12] | Message[12] 0x00 Perm_row_1[ 0] 1 Tag[12] +; N[13] AEAD_State[13] | Message[13] 0x00 Perm_row_1[ 1] 1 Tag[13] +; N[14] AEAD_State[14] | Message[14] 0x00 Perm_row_1[ 2] 1 Tag[14] +; N[15] AEAD_State[15] | Message[15] 0x00 Perm_row_1[ 3] 1 Tag[15] +; K[ 0] AEAD_State[16] | Message[16] 0x00 Perm_row_1[ 4] 1 +; K[ 1] AEAD_State[17] | Message[17] 0x00 Perm_row_1[ 5] 1 +; K[ 2] AEAD_State[18] | Message[18] 0x00 Perm_row_1[ 6] 1 +; K[ 3] AEAD_State[19] | Message[19] 0x00 Perm_row_1[ 7] 1 +; K[ 4] AEAD_State[20] | Message[20] 0x00 Perm_row_1[ 8] 1 +; K[ 5] AEAD_State[21] | Message[21] 0x00 Perm_row_1[ 9] 1 +; K[ 6] AEAD_State[22] | Message[22] 0x00 Perm_row_1[10] 1 +; K[ 7] AEAD_State[23] | Message[23] 0x00 Perm_row_1[11] <<< 1 +; K[ 8] AEAD_State[24] | Perm_row_2[ 0] 8 +; K[ 9] AEAD_State[25] | Perm_row_2[ 1] 8 +; K[10] AEAD_State[26] | Perm_row_2[ 2] 8 +; K[11] AEAD_State[27] | Perm_row_2[ 3] 8 +; K[12] AEAD_State[28] | Perm_row_2[ 4] 8 +; K[13] AEAD_State[29] | Perm_row_2[ 5] 8 +; K[14] AEAD_State[30] | Perm_row_2[ 6] 8 +; K[15] AEAD_State[31] | Perm_row_2[ 7] 8 +; 0x00 AEAD_State[32] | Perm_row_2[ 8] 8 +; 0x00 AEAD_State[33] | Perm_row_2[ 9] 8 +; 0x00 AEAD_State[34] | Perm_row_2[10] 8 +; 0x00 AEAD_State[35] | Perm_row_2[11] <<< 8 +; 0x00 AEAD_State[36] | Perm_row_3[ 0] 55 +; 0x00 AEAD_State[37] | Perm_row_3[ 1] 55 +; 0x00 AEAD_State[38] | Perm_row_3[ 2] 55 +; 0x00 AEAD_State[39] | Perm_row_3[ 3] 55 +; 0x00 AEAD_State[40] | Perm_row_3[ 4] 55 +; 0x00 AEAD_State[41] | Perm_row_3[ 5] 55 +; 0x00 AEAD_State[42] | Perm_row_3[ 6] 55 +; 0x00 AEAD_State[43] | Perm_row_3[ 7] 55 +; 0x00 AEAD_State[44] | Perm_row_3[ 8] 55 +; 0x00 AEAD_State[45] | Perm_row_3[ 9] 55 +; 0x00 AEAD_State[46] | Perm_row_3[10] 55 +; 0x00 ^0x80 AEAD_State[47] | ^0x80 Perm_row_3[11] <<< 55 diff --git a/knot/Implementations/crypto_aead/knot128v2/avr8_speed/knot256.h b/knot/Implementations/crypto_aead/knot128v2/avr8_speed/knot256.h new file mode 100644 index 0000000..f99f68b --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/avr8_speed/knot256.h @@ -0,0 +1,306 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; +#define x10 r0 +#define x11 r1 +#define x12 r2 +#define x13 r3 +#define x14 r4 +#define x15 r5 +#define x16 r6 +#define x17 r7 + +; an intentionally arrangement of registers to facilitate movw +#define x20 r8 +#define x21 r10 +#define x22 r12 +#define x23 r14 +#define x24 r9 +#define x25 r11 +#define x26 r13 +#define x27 r15 + +; an intentionally arrangement of registers to facilitate movw +#define x30 r16 +#define x35 r18 +#define x32 r20 +#define x37 r22 +#define x34 r17 +#define x31 r19 +#define x36 r21 +#define x33 r23 + +#define t0j r24 +#define t1j r25 +#define x0j r27 + +#include "assist.h" + +.macro Sbox i0, i1, i2, i3 + mov t0j, \i1 + com \i0 + and \i1, \i0 + eor \i1, \i2 + or \i2, t0j + eor \i0, \i3 + eor \i2, \i0 + eor t0j, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, t0j + and t0j, \i2 + eor \i1, t0j +.endm + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + ld x10, Y+ + ld x11, Y+ + ld x12, Y+ + ld x13, Y+ + ld x14, Y+ + ld x15, Y+ + ld x16, Y+ + ld x17, Y+ + ld x20, Y+ + ld x21, Y+ + ld x22, Y+ + ld x23, Y+ + ld x24, Y+ + ld x25, Y+ + ld x26, Y+ + ld x27, Y+ + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH + rjmp For_Hash +For_AEAD: + ldi ZL, lo8(RC_LFSR6) + ldi ZH, hi8(RC_LFSR6) + rjmp round_loop_start +For_Hash: + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) +#elif defined(CRYPTO_AEAD) + ldi ZL, lo8(RC_LFSR6) + ldi ZH, hi8(RC_LFSR6) +#else + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) +#endif + +round_loop_start: + ; AddRC + lpm t0j, Z+ + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + ld x0j, Y + eor x0j, t0j + + ; SubColumns + Sbox x0j, x10, x20, x30 + st Y+, x0j + ld x0j, Y + Sbox x0j, x11, x21, x31 + st Y+, x0j + ld x0j, Y + Sbox x0j, x12, x22, x32 + st Y+, x0j + ld x0j, Y + Sbox x0j, x13, x23, x33 + st Y+, x0j + ld x0j, Y + Sbox x0j, x14, x24, x34 + st Y+, x0j + ld x0j, Y + Sbox x0j, x15, x25, x35 + st Y+, x0j + ld x0j, Y + Sbox x0j, x16, x26, x36 + st Y+, x0j + ld x0j, Y + Sbox x0j, x17, x27, x37 + st Y, x0j + + ; ShiftRows + ; <<< 1 + mov t0j, x17 + rol t0j + rol x10 + rol x11 + rol x12 + rol x13 + rol x14 + rol x15 + rol x16 + rol x17 + + ; <<< 8 + ; 7 6 5 4 3 2 1 0 => 6 5 4 3 2 1 0 7 + ;mov t0j, x27 + ;mov x27, x26 + ;mov x26, x25 + ;mov x25, x24 + ;mov x24, x23 + ;mov x23, x22 + ;mov x22, x21 + ;mov x21, x20 + ;mov x20, t0j + ; an intentionally arrangement of registers to facilitate movw + movw t0j, x23 ; t1j:t0j <= x27:x23 + movw x23, x22 ; x27:x23 <= x26:x22 + movw x22, x21 ; x26:x22 <= x25:x21 + movw x21, x20 ; x25:x21 <= x24:x20 + mov x20, t1j ; x20 <= t1j + mov x24, t0j ; x24 <= t0j + + ; <<< 1 + mov t0j, x37 + rol t0j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + ; <<< 24 + ; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5 + ;mov t0j, x30 + ;mov x30, x35 + ;mov x35, x32 + ;mov x32, x37 + ;mov x37, x34 + ;mov x34, x31 + ;mov x31, x36 + ;mov x36, x33 + ;mov x33, t0j + ; an intentionally arrangement of registers to facilitate movw + ;x30 r16 + ;x35 r18 + ;x32 r20 + ;x37 r22 + ;x34 r17 + ;x31 r19 + ;x36 r21 + ;x33 r23 + movw t0j, x30 ; t1j:t0j <= x34:x30 + movw x30, x35 ; x34:x30 <= x31:x35 + movw x35, x32 ; x31:x35 <= x36:x32 + movw x32, x37 ; x36:x32 <= x33:x37 + mov x37, t1j ; x37 <= x34 + mov x33, t0j ; x33 <= x30 + + dec rcnt + breq round_loop_end + jmp round_loop_start + +round_loop_end: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + st Y+, x10 + st Y+, x11 + st Y+, x12 + st Y+, x13 + st Y+, x14 + st Y+, x15 + st Y+, x16 + st Y+, x17 + st Y+, x20 + st Y+, x21 + st Y+, x22 + st Y+, x23 + st Y+, x24 + st Y+, x25 + st Y+, x26 + st Y+, x27 + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + + POP_CONFLICT +ret + + +.section .text +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +RC_LFSR6: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06 +.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29 +.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28 +.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24 +.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37 +.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26 +.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f +.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00 +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +#elif defined(CRYPTO_AEAD) +RC_LFSR6: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06 +.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29 +.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28 +.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24 +.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37 +.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26 +.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f +.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00 +#else +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v2/avr8_speed/knot384.h b/knot/Implementations/crypto_aead/knot128v2/avr8_speed/knot384.h new file mode 100644 index 0000000..0b3dd75 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/avr8_speed/knot384.h @@ -0,0 +1,261 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; + +; an intentionally arrangement of registers to facilitate movw +#define x20 r0 +#define x21 r2 +#define x22 r4 +#define x23 r6 +#define x24 r8 +#define x25 r10 +#define x26 r1 +#define x27 r3 +#define x28 r5 +#define x29 r7 +#define x2a r9 +#define x2b r11 + +; an intentionally arrangement of registers to facilitate movw +#define x30 r22 +#define x35 r20 +#define x3a r18 +#define x33 r16 +#define x38 r14 +#define x31 r12 +#define x36 r23 +#define x3b r21 +#define x34 r19 +#define x39 r17 +#define x32 r15 +#define x37 r13 + +#define t0j r24 +#define t1j r25 +#define x0j r25 +#define x1j r27 + +#include "assist.h" + +.macro Sbox i0, i1, i2, i3 + ldi t0j, 0xFF + eor \i0, t0j + mov t0j, \i1 + and \i1, \i0 + eor \i1, \i2 + or \i2, t0j + eor \i0, \i3 + eor \i2, \i0 + eor t0j, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, t0j + and t0j, \i2 + eor \i1, t0j +.endm + +.macro OneColumn i0, i1, i2, i3 + ld \i0, Y + ldd \i1, Y + ROW_INBYTES + Sbox \i0, \i1, \i2, \i3 + st Y+, \i0 + rol \i1 ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES -1, \i1 +.endm + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + ld x20, Y+ + ld x21, Y+ + ld x22, Y+ + ld x23, Y+ + ld x24, Y+ + ld x25, Y+ + ld x26, Y+ + ld x27, Y+ + ld x28, Y+ + ld x29, Y+ + ld x2a, Y+ + ld x2b, Y+ + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) + +round_loop_start: + ; AddRC + lpm t0j, Z+ + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + ld x0j, Y + eor x0j, t0j + + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x20, x30 + st Y+, x0j + lsl x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES -1, x1j + + OneColumn x0j, x1j, x21, x31 + OneColumn x0j, x1j, x22, x32 + OneColumn x0j, x1j, x23, x33 + OneColumn x0j, x1j, x24, x34 + OneColumn x0j, x1j, x25, x35 + OneColumn x0j, x1j, x26, x36 + OneColumn x0j, x1j, x27, x37 + OneColumn x0j, x1j, x28, x38 + OneColumn x0j, x1j, x29, x39 + OneColumn x0j, x1j, x2a, x3a + OneColumn x0j, x1j, x2b, x3b + + ld x1j, Y + eor t0j, t0j + adc x1j, t0j + st Y, x1j + + ; b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- x- -- 1 + ; -- -- -- -- x' -- -- -- -- -- -- -- 7 + ; 4 3 2 1 0 b a 9 8 7 6 5 + + ; ShiftRows -- the last two rows + ; <<< 8 + ; b a 9 8 7 6 5 4 3 2 1 0 => a 9 8 7 6 5 4 3 2 1 0 b + movw t0j, x25 ; t1j:t0j <= x2b:x25 + movw x25, x24 ; x2b:x25 <= x2a:x24 + movw x24, x23 ; x2a:x24 <= x29:x23 + movw x23, x22 ; x29:x23 <= x28:x22 + movw x22, x21 ; x28:x22 <= x27:x21 + movw x21, x20 ; x27:x21 <= x26:x20 + mov x26, t0j ; x26 <= x25 + mov x20, t1j ; x20 <= x2b + + ; >>> 1 + mov t0j, x3b + ror t0j + ror x3a + ror x39 + ror x38 + ror x37 + ror x36 + ror x35 + ror x34 + ror x33 + ror x32 + ror x31 + ror x30 + ror x3b + ; <<< 56 + ; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5 + ; mov x3j, x30 + ; mov x30, x35 + ; mov x35, x3a + ; mov x3a, x33 + ; mov x33, x38 + ; mov x38, x31 + ; mov x31, x36 + ; mov x36, x3b + ; mov x3b, x34 + ; mov x34, x39 + ; mov x39, x32 + ; mov x32, x37 + ; mov x37, x3j + ; an intentionally arrangement of registers to facilitate movw + ; x30 r22 + ; x35 r20 + ; x3a r18 + ; x33 r16 + ; x38 r14 + ; x31 r12 + ; x36 r23 + ; x3b r21 + ; x34 r19 + ; x39 r17 + ; x32 r15 + ; x37 r13 + movw t0j, x30 ; t1j:t0j <= x36:x30 + movw x30, x35 ; x36:x30 <= x3b:x35 + movw x35, x3a ; x3b:x35 <= x34:x3a + movw x3a, x33 ; x34:x3a <= x39:x33 + movw x33, x38 ; x39:x33 <= x32:x38 + movw x38, x31 ; x32:x38 <= x37:x31 + mov x31, t1j ; x31 <= x36 + mov x37, t0j ; x37 <= x30 + + dec rcnt + breq round_loop_end + jmp round_loop_start + +round_loop_end: + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + st Y+, x20 + st Y+, x21 + st Y+, x22 + st Y+, x23 + st Y+, x24 + st Y+, x25 + st Y+, x26 + st Y+, x27 + st Y+, x28 + st Y+, x29 + st Y+, x2a + st Y+, x2b + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + + POP_CONFLICT +ret + +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v2/avr8_speed/knot512.h b/knot/Implementations/crypto_aead/knot128v2/avr8_speed/knot512.h new file mode 100644 index 0000000..b0e4319 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/avr8_speed/knot512.h @@ -0,0 +1,435 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; +#define x20 r0 +#define x22 r2 +#define x24 r4 +#define x26 r6 +#define x28 r1 +#define x2a r3 +#define x2c r5 +#define x2e r7 + +#define x30 r8 +#define x3d r10 +#define x3a r12 +#define x37 r14 +#define x34 r16 +#define x31 r18 +#define x3e r20 +#define x3b r22 +#define x38 r9 +#define x35 r11 +#define x32 r13 +#define x3f r15 +#define x3c r17 +#define x39 r19 +#define x36 r21 +#define x33 r23 + +#define t0j r24 +#define t1j r25 +#define x0j r25 +#define x1j r27 +#define x2j r26 + +#include "assist.h" + +.macro Sbox i0, i1, i2, i3 + ldi t0j, 0xFF + eor \i0, t0j + mov t0j, \i1 + and \i1, \i0 + eor \i1, \i2 + or \i2, t0j + eor \i0, \i3 + eor \i2, \i0 + eor t0j, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, t0j + and t0j, \i2 + eor \i1, t0j +.endm + +.macro TwoColumns i2_e, i3_e, i3_o + ; column 2i + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, \i2_e, \i3_e + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ; column 2i+1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x2j, \i3_o + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + ldd t0j, Y + 2 * ROW_INBYTES + 1 + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t0j +.endm + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + push rcnt + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + + ldd x20, Y + 0x00 + ldd x22, Y + 0x02 + ldd x24, Y + 0x04 + ldd x26, Y + 0x06 + ldd x28, Y + 0x08 + ldd x2a, Y + 0x0a + ldd x2c, Y + 0x0c + ldd x2e, Y + 0x0e + + adiw YL, ROW_INBYTES + + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + ld x3c, Y+ + ld x3d, Y+ + ld x3e, Y+ + ld x3f, Y+ + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH + rjmp For_Hash +For_AEAD: + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) + rjmp round_loop_start +For_Hash: + ldi ZL, lo8(RC_LFSR8) + ldi ZH, hi8(RC_LFSR8) +#elif defined(CRYPTO_AEAD) + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) +#else + ldi ZL, lo8(RC_LFSR8) + ldi ZH, hi8(RC_LFSR8) +#endif + + +round_loop_start: + ; AddRC + lpm t0j, Z+ + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + + ; column 0 + ld x0j, Y + eor x0j, t0j + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x20, x30 + st Y+, x0j + lsl x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ; column 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + Sbox x0j, x1j, x2j, x31 + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + ldd t0j, Y + 2 * ROW_INBYTES + 1 + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t0j + + ; column 2, 3 + TwoColumns x22, x32, x33 + ; column 4, 5 + TwoColumns x24, x34, x35 + ; column 6, 7 + TwoColumns x26, x36, x37 + ; column 8, 9 + TwoColumns x28, x38, x39 + ; column 10, 11 + TwoColumns x2a, x3a, x3b + ; column 12, 13 + TwoColumns x2c, x3c, x3d + + ; column 14 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x2e, x3e + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ; column 15 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x2j, x3f + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ld x1j, Y + eor t0j, t0j + adc x1j, t0j + st Y, x1j + std Y + ROW_INBYTES + 1, x2j + + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2 + ; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3 + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + ; x2e x2c x2a x28 x26 x24 x22 x20 => x2c x2a x28 x26 x24 x22 x20 x2e + ;mov t0j, x2e + ;mov x2e, x2c + ;mov x2c, x2a + ;mov x2a, x28 + ;mov x28, x26 + ;mov x26, x24 + ;mov x24, x22 + ;mov x22, x20 + ;mov x20, t0j + ; an intentionally arrangement of registers to facilitate movw + movw t0j, x26 ; t1j:t0j <= x2e:x26 + movw x26, x24 ; x2e:x26 <= x2c:x24 + movw x24, x22 ; x2c:x24 <= x2a:x22 + movw x22, x20 ; x2a:x22 <= x28:x20 + mov x20, t1j ; x20 <= t1j + mov x28, t0j ; x28 <= t0j + + ; <<< 1 + mov t0j, x3f + rol t0j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + rol x38 + rol x39 + rol x3a + rol x3b + rol x3c + rol x3d + rol x3e + rol x3f + ; <<< 24 + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 => + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + ; mov x3j, x30 + ; mov x30, x3d + ; mov x3d, x3a + ; mov x3a, x37 + ; mov x37, x34 + ; mov x34, x31 + ; mov x31, x3e + ; mov x3e, x3b + ; mov x3b, x38 + ; mov x38, x35 + ; mov x35, x32 + ; mov x32, x3f + ; mov x3f, x3c + ; mov x3c, x39 + ; mov x39, x36 + ; mov x36, x33 + ; mov x33, x3j + ; an intentionally arrangement of registers to facilitate movw + ; x30 r8 + ; x3d r10 + ; x3a r12 + ; x37 r14 + ; x34 r16 + ; x31 r18 + ; x3e r20 + ; x3b r22 + ; x38 r9 + ; x35 r11 + ; x32 r13 + ; x3f r15 + ; x3c r17 + ; x39 r19 + ; x36 r21 + ; x33 r23 + movw t0j, x30 ; t1j:t0j <= x38:x30 + movw x30, x3d ; x38:x30 <= x35:x3d + movw x3d, x3a ; x35:x3d <= x32:x3a + movw x3a, x37 ; x32:x3a <= x3f:x37 + movw x37, x34 ; x3f:x37 <= x3c:x34 + movw x34, x31 ; x3c:x34 <= x39:x31 + movw x31, x3e ; x39:x31 <= x36:x3e + movw x3e, x3b ; x36:x3e <= x33:x3b + mov x3b, t1j ; x3b <= x38 + mov x33, t0j ; x33 <= x30 + + pop rcnt + dec rcnt + push rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + pop rcnt + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + std Y + 0x00, x20 + std Y + 0x02, x22 + std Y + 0x04, x24 + std Y + 0x06, x26 + std Y + 0x08, x28 + std Y + 0x0a, x2a + std Y + 0x0c, x2c + std Y + 0x0e, x2e + adiw YL, ROW_INBYTES + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + st Y+, x3c + st Y+, x3d + st Y+, x3e + st Y+, x3f + + POP_CONFLICT +ret + +.section .text +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +RC_LFSR8: +.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e +.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25 +.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0 +.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9 +.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37 +.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20 +.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad +.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac +.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0 +.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb +.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a +.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8 +.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86 +.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7 +.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c +.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5 +.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4 +.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52 +.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7 +.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76 +.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b +.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4 +.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd +.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46 +.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e +.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab +.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5 +.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09 +.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe +.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17 +.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d +.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00 +#elif defined(CRYPTO_AEAD) +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +#else +RC_LFSR8: +.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e +.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25 +.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0 +.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9 +.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37 +.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20 +.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad +.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac +.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0 +.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb +.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a +.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8 +.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86 +.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7 +.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c +.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5 +.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4 +.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52 +.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7 +.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76 +.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b +.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4 +.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd +.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46 +.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e +.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab +.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5 +.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09 +.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe +.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17 +.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d +.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00 +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v2/avr8_speed/permutation.h b/knot/Implementations/crypto_aead/knot128v2/avr8_speed/permutation.h new file mode 100644 index 0000000..e6c9793 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/avr8_speed/permutation.h @@ -0,0 +1,45 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; R E G I S T E R D E F I N I T I O N S +; ============================================ +; + +#define mclen r16 +#define radlen r17 +#define tcnt r17 +#define tmp0 r20 +#define tmp1 r21 +#define cnt0 r22 +#define rn r23 +#define rate r24 + + +; AEDH = 0b000: for authenticate AD +; AEDH = 0b001: for encryption +; AEDH = 0b011: for decryption +; AEDH = 0b100: for hash +#define AEDH r25 +#define rcnt r26 + +#if (STATE_INBITS==256) +#include "knot256.h" +#elif (STATE_INBITS==384) +#include "knot384.h" +#elif (STATE_INBITS==512) +#include "knot512.h" +#else +#error "Not specified key size and state size" +#endif + + diff --git a/knot/Implementations/crypto_aead/knot192/avr8_speed/api.h b/knot/Implementations/crypto_aead/knot192/avr8_speed/api.h new file mode 100644 index 0000000..0146d82 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/avr8_speed/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 24 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 24 +#define CRYPTO_ABYTES 24 +#define CRYPTO_NOOVERLAP 1 diff --git a/knot/Implementations/crypto_aead/knot192/avr8_speed/assist.h b/knot/Implementations/crypto_aead/knot192/avr8_speed/assist.h new file mode 100644 index 0000000..f95a717 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/avr8_speed/assist.h @@ -0,0 +1,86 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; +.macro PUSH_CONFLICT + push r16 + push r17 + push r18 + push r19 + + push r23 + push r24 + push r25 + push r26 + push r27 + push r28 + push r29 + push r30 + push r31 +.endm + +.macro POP_CONFLICT + pop r31 + pop r30 + pop r29 + pop r28 + pop r27 + pop r26 + pop r25 + pop r24 + pop r23 + + pop r19 + pop r18 + pop r17 + pop r16 +.endm + +.macro PUSH_ALL + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 +.endm + +.macro POP_ALL + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + clr r1 +.endm \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot192/avr8_speed/config.h b/knot/Implementations/crypto_aead/knot192/avr8_speed/config.h new file mode 100644 index 0000000..173a40a --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/avr8_speed/config.h @@ -0,0 +1,131 @@ +#ifndef __CONFIG_H__ +#define __CONFIG_H__ + +#define CRYPTO_AEAD +//#define CRYPTO_HASH + +#define MAX_MESSAGE_LENGTH 128 + +#define STATE_INBITS 384 +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBITS 192 +/* For CRYPTO_HASH */ +#define CRYPTO_BITS 384 + +#define STATE_INBYTES ((STATE_INBITS + 7) / 8) +#define ROW_INBITS ((STATE_INBITS + 3) / 4) +#define ROW_INBYTES ((ROW_INBITS + 7) / 8) + +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8) +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES +#define CRYPTO_ABYTES CRYPTO_KEYBYTES +#define CRYPTO_NOOVERLAP 1 + +#define MAX_ASSOCIATED_DATA_LENGTH 32 +#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES) + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +/* For CRYPTO_HASH */ +#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8) + + + +#define DOMAIN_BITS 0x80 +#define PAD_BITS 0x01 +#define S384_R192_BITS 0x80 + +#if (STATE_INBITS==256) +#define C1 1 +#define C2 8 +#define C3 25 +#elif (STATE_INBITS==384) +#define C1 1 +#define C2 8 +#define C3 55 +#elif (STATE_INBITS==512) +#define C1 1 +#define C2 16 +#define C3 25 +#else +#error "Not specified state size" +#endif + +#ifdef CRYPTO_AEAD +/* For CRYPTO_AEAD */ +#define KEY_INBITS (CRYPTO_KEYBYTES * 8) +#define KEY_INBYTES (CRYPTO_KEYBYTES) + +#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8) +#define NONCE_INBYTES (CRYPTO_NPUBBYTES) + +#define TAG_INBITS (CRYPTO_ABYTES * 8) +#define TAG_INBYTES (CRYPTO_ABYTES) + +#if (KEY_INBITS==128) && (STATE_INBITS==256) +#define RATE_INBITS 64 +#define NR_0 52 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==128) && (STATE_INBITS==384) +#define RATE_INBITS 192 +#define NR_0 76 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==192) && (STATE_INBITS==384) +#define RATE_INBITS 96 +#define NR_0 76 +#define NR_i 40 +#define NR_f 44 +#elif (KEY_INBITS==256) && (STATE_INBITS==512) +#define RATE_INBITS 128 +#define NR_0 100 +#define NR_i 52 +#define NR_f 56 +#else +#error "Not specified key size and state size" +#endif + +#define RATE_INBYTES ((RATE_INBITS + 7) / 8) +#define SQUEEZE_RATE_INBYTES TAG_INBYTES + +#endif + +#ifdef CRYPTO_HASH +/* For CRYPTO_HASH */ +#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8) + +#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256) +#define HASH_RATE_INBITS 32 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 68 +#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 128 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 80 +#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 48 +#define HASH_SQUEEZE_RATE_INBITS 192 +#define NR_h 104 +#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512) +#define HASH_RATE_INBITS 64 +#define HASH_SQUEEZE_RATE_INBITS 256 +#define NR_h 140 +#else +#error "Not specified hash digest size and state size" +#endif + +#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8) +#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8) + +#endif + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot192/avr8_speed/crypto_aead.h b/knot/Implementations/crypto_aead/knot192/avr8_speed/crypto_aead.h new file mode 100644 index 0000000..cd820d3 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/avr8_speed/crypto_aead.h @@ -0,0 +1,26 @@ +#ifdef __cplusplus +extern "C" { +#endif + +int crypto_aead_encrypt( + unsigned char *c,unsigned long long *clen, + const unsigned char *m,unsigned long long mlen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ); + + +int crypto_aead_decrypt( + unsigned char *m,unsigned long long *outputmlen, + unsigned char *nsec, + const unsigned char *c,unsigned long long clen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ); + +#ifdef __cplusplus +} +#endif diff --git a/knot/Implementations/crypto_aead/knot192/avr8_speed/encrypt.c b/knot/Implementations/crypto_aead/knot192/avr8_speed/encrypt.c new file mode 100644 index 0000000..baf0a3b --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/avr8_speed/encrypt.c @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#include "config.h" + +extern void crypto_aead_encrypt_asm( + unsigned char *c, + const unsigned char *m, + unsigned char mlen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern int crypto_aead_decrypt_asm( + unsigned char *m, + const unsigned char *c, + unsigned char clen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern void crypto_hash_asm( + unsigned char *out, + const unsigned char *in, + unsigned char inlen + ); + + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the cipher implementation goes here, + ... generating a ciphertext c[0],c[1],...,c[*clen-1] + ... from a plaintext m[0],m[1],...,m[mlen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce npub[0],npub[1],.. + ... and secret key k[0],k[1],... + ... the implementation shall not use nsec + ... + ... return 0; + */ + + (void)nsec; + + crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k); + + *clen = mlen + TAG_INBYTES; + return 0; +} + + + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the AEAD implementation goes here, + ... generating a plaintext m[0],m[1],...,m[*mlen-1] + ... and secret message number nsec[0],nsec[1],... + ... from a ciphertext c[0],c[1],...,c[clen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce number npub[0],npub[1],... + ... and secret key k[0],k[1],... + ... + ... return 0; + */ + unsigned long long mlen_; + unsigned char tag_is_match; + + (void)nsec; + if (clen < CRYPTO_ABYTES) { + return -1; + } + mlen_ = clen - CRYPTO_ABYTES; + + tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k); + + if (tag_is_match != 0) + { + memset(m, 0, (size_t)mlen_); + return -1; + } + + *mlen = mlen_; + return 0; +} \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot192/avr8_speed/encrypt_core.S b/knot/Implementations/crypto_aead/knot192/avr8_speed/encrypt_core.S new file mode 100644 index 0000000..bd74f93 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/avr8_speed/encrypt_core.S @@ -0,0 +1,555 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; S R A M D E F I N I T I O N S +; ============================================ +; +#include +#include "config.h" + +.section .noinit + SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (STATE_INBYTES > 32) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (STATE_INBYTES > 48) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_LEN: .BYTE 0, 0 +#ifdef CRYPTO_AEAD +; For CRYPTO_AEAD + SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0 + SRAM_ADLEN: .BYTE 0, 0 + SRAM_NONCE_ADDR: .BYTE 0, 0 + SRAM_KEY_ADDR: .BYTE 0, 0 + + SRAM_ADDITIONAL: + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (CRYPTO_ABYTES > 16) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (CRYPTO_ABYTES > 24) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + +#endif + +.section .text + +#include "permutation.h" + +; require YH:YL be the address of the current associated data/cipher/message block +; for enc and dec, store ciphertext or plaintext +; require ZH:ZL be the address of the current cipher/message block +.macro XOR_to_State_ENCDEC + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov cnt0, rate +XOR_to_State_loop_ENCDEC: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_to_State_loop_ENCDEC +; YH:YL are now the address of the next associated data block +.endm + +; require YH:YL be the address of the current associated data/cipher/message block +; for enc and dec, store ciphertext or plaintext +; require ZH:ZL be the address of the current cipher/message block +.macro XOR_to_State_AUTH + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov cnt0, rate +XOR_to_State_loop_AUTH: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_to_State_loop_AUTH +; YH:YL are now the address of the next associated data block +.endm + + + +; require YH:YL pointed to the input data +; require ZH:ZL pointed to the output data +; require cnt0 containes the nubmer of bytes in source data +; require number of bytes in source data less than rate, i.e., 0 <= cnt0 < rate +; +; the 0th bit in AEDH is used to distinguish (auth AD) or (enc/dec M/C): +; AEDH[0] = 0 for (auth AD), AEDH[0] = 1 for (enc/dec M/C) +; the 1th bit in AEDH is used to distinguish (auth AD/enc M) or (dec C): +; AEDH[1] = 0 for (auth AD/enc M), AEDH[1] = 1 for (dec C) +; AEDH = 0b000 for (auth AD) +; AEDH = 0b001 for (enc M) +; AEDH = 0b011 for (dec C) +Pad_XOR_to_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + tst cnt0 + breq XOR_padded_data +XOR_source_data_loop: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_source_data_loop +XOR_padded_data: + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 +ret + +AddDomain: + ldi XH, hi8(SRAM_STATE + STATE_INBYTES - 1) + ldi XL, lo8(SRAM_STATE + STATE_INBYTES - 1) + ldi tmp0, DOMAIN_BITS + ld tmp1, X + eor tmp0, tmp1 + st X, tmp0 +ret + +; require ZH:ZL be the address of the destination +EXTRACT_from_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov tmp1, rate +EXTRACT_from_State_loop: + ld tmp0, X+ + st Z+, tmp0 + dec tmp1 + brne EXTRACT_from_State_loop +ret + +AUTH: + tst radlen + breq AUTH_end + + cp radlen, rate + brlo auth_ad_padded_block + +auth_ad_loop: + XOR_to_State_AUTH + rcall Permutation + sub radlen, rate + cp radlen, rate + brlo auth_ad_padded_block + rjmp auth_ad_loop + +auth_ad_padded_block: + mov cnt0, radlen + rcall Pad_XOR_to_State + rcall Permutation + +AUTH_end: +ret + +#ifdef CRYPTO_AEAD +Initialization: + ldi rn, NR_0 + ldi XL, lo8(SRAM_STATE) + ldi XH, hi8(SRAM_STATE) + + lds YH, SRAM_NONCE_ADDR + lds YL, SRAM_NONCE_ADDR + 1 + ldi cnt0, CRYPTO_NPUBBYTES +load_nonce_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_nonce_loop + + lds YH, SRAM_KEY_ADDR + lds YL, SRAM_KEY_ADDR + 1 + ldi cnt0, CRYPTO_KEYBYTES +load_key_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_key_loop + +#if (STATE_INBITS==384) && (RATE_INBITS==192) + ldi cnt0, (STATE_INBYTES - CRYPTO_NPUBBYTES - CRYPTO_KEYBYTES - 1) + clr tmp0 +empty_state_loop: + st X+, tmp0 + dec cnt0 + brne empty_state_loop + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + rcall Permutation +ret + +ENC: + tst mclen + breq ENC_end + + cp mclen, rate + brlo enc_padded_block + +enc_loop: + XOR_to_State_ENCDEC + ldi rn, NR_i + rcall Permutation + sub mclen, rate + cp mclen, rate + brlo enc_padded_block + rjmp enc_loop + +enc_padded_block: + mov cnt0, mclen + rcall Pad_XOR_to_State +ENC_end: +ret + +Finalization: + ldi rate, SQUEEZE_RATE_INBYTES + ldi rn, NR_f + rcall Permutation + rcall EXTRACT_from_State +ret + +; void crypto_aead_encrypt_asm( +; unsigned char *c, +; const unsigned char *m, +; unsigned long long mlen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *c, is passed in r24:r25 +; const unsigned char *m, is passed in r22:r23 +; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_encrypt_asm +crypto_aead_encrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b001 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + rcall Finalization + POP_ALL +ret + +; int crypto_aead_decrypt_asm( +; unsigned char *m, +; const unsigned char *c, +; unsigned long long clen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *m, is passed in r24:r25 +; const unsigned char *c, is passed in r22:r23 +; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_decrypt_asm +crypto_aead_decrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b011 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + + ldi ZH, hi8(SRAM_ADDITIONAL) + ldi ZL, lo8(SRAM_ADDITIONAL) + rcall Finalization + + sbiw ZL, CRYPTO_ABYTES + ldi cnt0, CRYPTO_ABYTES +compare_tag: + ld tmp0, Z+ + ld tmp1, Y+ + cp tmp0, tmp1 + brne return_tag_not_match + dec cnt0 + brne compare_tag + rjmp return_tag_match + +return_tag_not_match: + ldi r25, 0xFF + ldi r24, 0xFF + rjmp crypto_aead_decrypt_end + +return_tag_match: + clr r25 + clr r24 +crypto_aead_decrypt_end: + POP_ALL +ret + +; #ifdef CRYPTO_AEAD +#endif + + +#ifdef CRYPTO_HASH + +; void crypto_hash_asm( +; unsigned char *out, +; const unsigned char *in, +; unsigned long long inlen +; ) +; +; unsigned char *out, is passed in r24:r25 +; const unsigned char *in, is passed in r22:r23 +; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used +.global crypto_hash_asm +crypto_hash_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + mov mclen, r20 + + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi cnt0, STATE_INBYTES - 1 +#else + ldi cnt0, STATE_INBYTES +#endif + clr tmp0 +zero_state: + st X+, tmp0 + dec cnt0 + brne zero_state + +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + ldi rn, NR_h + ldi AEDH, 0b100 + +HASH_ABSORBING: + mov radlen, mclen + tst radlen + breq EMPTY_M + ldi rate, HASH_RATE_INBYTES + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + rcall AUTH + rjmp HASH_SQUEEZING + +EMPTY_M: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 + rcall Permutation + +HASH_SQUEEZING: + ldi rate, HASH_SQUEEZE_RATE_INBYTES + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + ldi tcnt, CRYPTO_BYTES +SQUEEZING_loop: + rcall EXTRACT_from_State + subi tcnt, HASH_SQUEEZE_RATE_INBYTES + breq HASH_SQUEEZING_end + rcall Permutation + rjmp SQUEEZING_loop +HASH_SQUEEZING_end: + POP_ALL +ret + +#endif + + +; Byte Order In AVR 8: +; KNOT-AEAD(128, 256, 64): +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[7] <<< 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Perm_row_1[0] 1 +; N[ 9] AEAD_State[ 9] | Perm_row_1[1] 1 +; N[10] AEAD_State[10] | Perm_row_1[2] 1 +; N[11] AEAD_State[11] | Perm_row_1[3] 1 +; N[12] AEAD_State[12] | Perm_row_1[4] 1 +; N[13] AEAD_State[13] | Perm_row_1[5] 1 +; N[14] AEAD_State[14] | Perm_row_1[6] 1 +; N[15] AEAD_State[15] | Perm_row_1[7] <<< 1 +; K[ 0] AEAD_State[16] | Perm_row_2[0] 8 +; K[ 1] AEAD_State[17] | Perm_row_2[1] 8 +; K[ 2] AEAD_State[18] | Perm_row_2[2] 8 +; K[ 3] AEAD_State[19] | Perm_row_2[3] 8 +; K[ 4] AEAD_State[20] | Perm_row_2[4] 8 +; K[ 5] AEAD_State[21] | Perm_row_2[5] 8 +; K[ 6] AEAD_State[22] | Perm_row_2[6] 8 +; K[ 7] AEAD_State[23] | Perm_row_2[7] <<< 8 +; K[ 8] AEAD_State[24] | Perm_row_3[0] 25 +; K[ 9] AEAD_State[25] | Perm_row_3[1] 25 +; K[10] AEAD_State[26] | Perm_row_3[2] 25 +; K[11] AEAD_State[27] | Perm_row_3[3] 25 +; K[12] AEAD_State[28] | Perm_row_3[4] 25 +; K[13] AEAD_State[29] | Perm_row_3[5] 25 +; K[14] AEAD_State[30] | Perm_row_3[6] 25 +; K[15] AEAD_State[31] | ^0x80 Perm_row_3[7] <<< 25 +; +; +; KNOT-AEAD(128, 384, 192): +; Initalization +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[ 0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[ 1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[ 2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[ 3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[ 4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[ 5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[ 6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[ 7] 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Message[ 8] 0x00 Perm_row_0[ 8] 0 Tag[ 8] +; N[ 9] AEAD_State[ 9] | Message[ 9] 0x00 Perm_row_0[ 9] 0 Tag[ 9] +; N[10] AEAD_State[10] | Message[10] 0x00 Perm_row_0[10] 0 Tag[10] +; N[11] AEAD_State[11] | Message[11] 0x00 Perm_row_0[11] <<< 0 Tag[11] +; N[12] AEAD_State[12] | Message[12] 0x00 Perm_row_1[ 0] 1 Tag[12] +; N[13] AEAD_State[13] | Message[13] 0x00 Perm_row_1[ 1] 1 Tag[13] +; N[14] AEAD_State[14] | Message[14] 0x00 Perm_row_1[ 2] 1 Tag[14] +; N[15] AEAD_State[15] | Message[15] 0x00 Perm_row_1[ 3] 1 Tag[15] +; K[ 0] AEAD_State[16] | Message[16] 0x00 Perm_row_1[ 4] 1 +; K[ 1] AEAD_State[17] | Message[17] 0x00 Perm_row_1[ 5] 1 +; K[ 2] AEAD_State[18] | Message[18] 0x00 Perm_row_1[ 6] 1 +; K[ 3] AEAD_State[19] | Message[19] 0x00 Perm_row_1[ 7] 1 +; K[ 4] AEAD_State[20] | Message[20] 0x00 Perm_row_1[ 8] 1 +; K[ 5] AEAD_State[21] | Message[21] 0x00 Perm_row_1[ 9] 1 +; K[ 6] AEAD_State[22] | Message[22] 0x00 Perm_row_1[10] 1 +; K[ 7] AEAD_State[23] | Message[23] 0x00 Perm_row_1[11] <<< 1 +; K[ 8] AEAD_State[24] | Perm_row_2[ 0] 8 +; K[ 9] AEAD_State[25] | Perm_row_2[ 1] 8 +; K[10] AEAD_State[26] | Perm_row_2[ 2] 8 +; K[11] AEAD_State[27] | Perm_row_2[ 3] 8 +; K[12] AEAD_State[28] | Perm_row_2[ 4] 8 +; K[13] AEAD_State[29] | Perm_row_2[ 5] 8 +; K[14] AEAD_State[30] | Perm_row_2[ 6] 8 +; K[15] AEAD_State[31] | Perm_row_2[ 7] 8 +; 0x00 AEAD_State[32] | Perm_row_2[ 8] 8 +; 0x00 AEAD_State[33] | Perm_row_2[ 9] 8 +; 0x00 AEAD_State[34] | Perm_row_2[10] 8 +; 0x00 AEAD_State[35] | Perm_row_2[11] <<< 8 +; 0x00 AEAD_State[36] | Perm_row_3[ 0] 55 +; 0x00 AEAD_State[37] | Perm_row_3[ 1] 55 +; 0x00 AEAD_State[38] | Perm_row_3[ 2] 55 +; 0x00 AEAD_State[39] | Perm_row_3[ 3] 55 +; 0x00 AEAD_State[40] | Perm_row_3[ 4] 55 +; 0x00 AEAD_State[41] | Perm_row_3[ 5] 55 +; 0x00 AEAD_State[42] | Perm_row_3[ 6] 55 +; 0x00 AEAD_State[43] | Perm_row_3[ 7] 55 +; 0x00 AEAD_State[44] | Perm_row_3[ 8] 55 +; 0x00 AEAD_State[45] | Perm_row_3[ 9] 55 +; 0x00 AEAD_State[46] | Perm_row_3[10] 55 +; 0x00 ^0x80 AEAD_State[47] | ^0x80 Perm_row_3[11] <<< 55 diff --git a/knot/Implementations/crypto_aead/knot192/avr8_speed/knot256.h b/knot/Implementations/crypto_aead/knot192/avr8_speed/knot256.h new file mode 100644 index 0000000..f99f68b --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/avr8_speed/knot256.h @@ -0,0 +1,306 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; +#define x10 r0 +#define x11 r1 +#define x12 r2 +#define x13 r3 +#define x14 r4 +#define x15 r5 +#define x16 r6 +#define x17 r7 + +; an intentionally arrangement of registers to facilitate movw +#define x20 r8 +#define x21 r10 +#define x22 r12 +#define x23 r14 +#define x24 r9 +#define x25 r11 +#define x26 r13 +#define x27 r15 + +; an intentionally arrangement of registers to facilitate movw +#define x30 r16 +#define x35 r18 +#define x32 r20 +#define x37 r22 +#define x34 r17 +#define x31 r19 +#define x36 r21 +#define x33 r23 + +#define t0j r24 +#define t1j r25 +#define x0j r27 + +#include "assist.h" + +.macro Sbox i0, i1, i2, i3 + mov t0j, \i1 + com \i0 + and \i1, \i0 + eor \i1, \i2 + or \i2, t0j + eor \i0, \i3 + eor \i2, \i0 + eor t0j, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, t0j + and t0j, \i2 + eor \i1, t0j +.endm + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + ld x10, Y+ + ld x11, Y+ + ld x12, Y+ + ld x13, Y+ + ld x14, Y+ + ld x15, Y+ + ld x16, Y+ + ld x17, Y+ + ld x20, Y+ + ld x21, Y+ + ld x22, Y+ + ld x23, Y+ + ld x24, Y+ + ld x25, Y+ + ld x26, Y+ + ld x27, Y+ + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH + rjmp For_Hash +For_AEAD: + ldi ZL, lo8(RC_LFSR6) + ldi ZH, hi8(RC_LFSR6) + rjmp round_loop_start +For_Hash: + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) +#elif defined(CRYPTO_AEAD) + ldi ZL, lo8(RC_LFSR6) + ldi ZH, hi8(RC_LFSR6) +#else + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) +#endif + +round_loop_start: + ; AddRC + lpm t0j, Z+ + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + ld x0j, Y + eor x0j, t0j + + ; SubColumns + Sbox x0j, x10, x20, x30 + st Y+, x0j + ld x0j, Y + Sbox x0j, x11, x21, x31 + st Y+, x0j + ld x0j, Y + Sbox x0j, x12, x22, x32 + st Y+, x0j + ld x0j, Y + Sbox x0j, x13, x23, x33 + st Y+, x0j + ld x0j, Y + Sbox x0j, x14, x24, x34 + st Y+, x0j + ld x0j, Y + Sbox x0j, x15, x25, x35 + st Y+, x0j + ld x0j, Y + Sbox x0j, x16, x26, x36 + st Y+, x0j + ld x0j, Y + Sbox x0j, x17, x27, x37 + st Y, x0j + + ; ShiftRows + ; <<< 1 + mov t0j, x17 + rol t0j + rol x10 + rol x11 + rol x12 + rol x13 + rol x14 + rol x15 + rol x16 + rol x17 + + ; <<< 8 + ; 7 6 5 4 3 2 1 0 => 6 5 4 3 2 1 0 7 + ;mov t0j, x27 + ;mov x27, x26 + ;mov x26, x25 + ;mov x25, x24 + ;mov x24, x23 + ;mov x23, x22 + ;mov x22, x21 + ;mov x21, x20 + ;mov x20, t0j + ; an intentionally arrangement of registers to facilitate movw + movw t0j, x23 ; t1j:t0j <= x27:x23 + movw x23, x22 ; x27:x23 <= x26:x22 + movw x22, x21 ; x26:x22 <= x25:x21 + movw x21, x20 ; x25:x21 <= x24:x20 + mov x20, t1j ; x20 <= t1j + mov x24, t0j ; x24 <= t0j + + ; <<< 1 + mov t0j, x37 + rol t0j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + ; <<< 24 + ; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5 + ;mov t0j, x30 + ;mov x30, x35 + ;mov x35, x32 + ;mov x32, x37 + ;mov x37, x34 + ;mov x34, x31 + ;mov x31, x36 + ;mov x36, x33 + ;mov x33, t0j + ; an intentionally arrangement of registers to facilitate movw + ;x30 r16 + ;x35 r18 + ;x32 r20 + ;x37 r22 + ;x34 r17 + ;x31 r19 + ;x36 r21 + ;x33 r23 + movw t0j, x30 ; t1j:t0j <= x34:x30 + movw x30, x35 ; x34:x30 <= x31:x35 + movw x35, x32 ; x31:x35 <= x36:x32 + movw x32, x37 ; x36:x32 <= x33:x37 + mov x37, t1j ; x37 <= x34 + mov x33, t0j ; x33 <= x30 + + dec rcnt + breq round_loop_end + jmp round_loop_start + +round_loop_end: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + st Y+, x10 + st Y+, x11 + st Y+, x12 + st Y+, x13 + st Y+, x14 + st Y+, x15 + st Y+, x16 + st Y+, x17 + st Y+, x20 + st Y+, x21 + st Y+, x22 + st Y+, x23 + st Y+, x24 + st Y+, x25 + st Y+, x26 + st Y+, x27 + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + + POP_CONFLICT +ret + + +.section .text +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +RC_LFSR6: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06 +.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29 +.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28 +.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24 +.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37 +.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26 +.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f +.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00 +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +#elif defined(CRYPTO_AEAD) +RC_LFSR6: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06 +.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29 +.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28 +.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24 +.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37 +.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26 +.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f +.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00 +#else +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot192/avr8_speed/knot384.h b/knot/Implementations/crypto_aead/knot192/avr8_speed/knot384.h new file mode 100644 index 0000000..0b3dd75 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/avr8_speed/knot384.h @@ -0,0 +1,261 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; + +; an intentionally arrangement of registers to facilitate movw +#define x20 r0 +#define x21 r2 +#define x22 r4 +#define x23 r6 +#define x24 r8 +#define x25 r10 +#define x26 r1 +#define x27 r3 +#define x28 r5 +#define x29 r7 +#define x2a r9 +#define x2b r11 + +; an intentionally arrangement of registers to facilitate movw +#define x30 r22 +#define x35 r20 +#define x3a r18 +#define x33 r16 +#define x38 r14 +#define x31 r12 +#define x36 r23 +#define x3b r21 +#define x34 r19 +#define x39 r17 +#define x32 r15 +#define x37 r13 + +#define t0j r24 +#define t1j r25 +#define x0j r25 +#define x1j r27 + +#include "assist.h" + +.macro Sbox i0, i1, i2, i3 + ldi t0j, 0xFF + eor \i0, t0j + mov t0j, \i1 + and \i1, \i0 + eor \i1, \i2 + or \i2, t0j + eor \i0, \i3 + eor \i2, \i0 + eor t0j, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, t0j + and t0j, \i2 + eor \i1, t0j +.endm + +.macro OneColumn i0, i1, i2, i3 + ld \i0, Y + ldd \i1, Y + ROW_INBYTES + Sbox \i0, \i1, \i2, \i3 + st Y+, \i0 + rol \i1 ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES -1, \i1 +.endm + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + ld x20, Y+ + ld x21, Y+ + ld x22, Y+ + ld x23, Y+ + ld x24, Y+ + ld x25, Y+ + ld x26, Y+ + ld x27, Y+ + ld x28, Y+ + ld x29, Y+ + ld x2a, Y+ + ld x2b, Y+ + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) + +round_loop_start: + ; AddRC + lpm t0j, Z+ + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + ld x0j, Y + eor x0j, t0j + + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x20, x30 + st Y+, x0j + lsl x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES -1, x1j + + OneColumn x0j, x1j, x21, x31 + OneColumn x0j, x1j, x22, x32 + OneColumn x0j, x1j, x23, x33 + OneColumn x0j, x1j, x24, x34 + OneColumn x0j, x1j, x25, x35 + OneColumn x0j, x1j, x26, x36 + OneColumn x0j, x1j, x27, x37 + OneColumn x0j, x1j, x28, x38 + OneColumn x0j, x1j, x29, x39 + OneColumn x0j, x1j, x2a, x3a + OneColumn x0j, x1j, x2b, x3b + + ld x1j, Y + eor t0j, t0j + adc x1j, t0j + st Y, x1j + + ; b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- x- -- 1 + ; -- -- -- -- x' -- -- -- -- -- -- -- 7 + ; 4 3 2 1 0 b a 9 8 7 6 5 + + ; ShiftRows -- the last two rows + ; <<< 8 + ; b a 9 8 7 6 5 4 3 2 1 0 => a 9 8 7 6 5 4 3 2 1 0 b + movw t0j, x25 ; t1j:t0j <= x2b:x25 + movw x25, x24 ; x2b:x25 <= x2a:x24 + movw x24, x23 ; x2a:x24 <= x29:x23 + movw x23, x22 ; x29:x23 <= x28:x22 + movw x22, x21 ; x28:x22 <= x27:x21 + movw x21, x20 ; x27:x21 <= x26:x20 + mov x26, t0j ; x26 <= x25 + mov x20, t1j ; x20 <= x2b + + ; >>> 1 + mov t0j, x3b + ror t0j + ror x3a + ror x39 + ror x38 + ror x37 + ror x36 + ror x35 + ror x34 + ror x33 + ror x32 + ror x31 + ror x30 + ror x3b + ; <<< 56 + ; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5 + ; mov x3j, x30 + ; mov x30, x35 + ; mov x35, x3a + ; mov x3a, x33 + ; mov x33, x38 + ; mov x38, x31 + ; mov x31, x36 + ; mov x36, x3b + ; mov x3b, x34 + ; mov x34, x39 + ; mov x39, x32 + ; mov x32, x37 + ; mov x37, x3j + ; an intentionally arrangement of registers to facilitate movw + ; x30 r22 + ; x35 r20 + ; x3a r18 + ; x33 r16 + ; x38 r14 + ; x31 r12 + ; x36 r23 + ; x3b r21 + ; x34 r19 + ; x39 r17 + ; x32 r15 + ; x37 r13 + movw t0j, x30 ; t1j:t0j <= x36:x30 + movw x30, x35 ; x36:x30 <= x3b:x35 + movw x35, x3a ; x3b:x35 <= x34:x3a + movw x3a, x33 ; x34:x3a <= x39:x33 + movw x33, x38 ; x39:x33 <= x32:x38 + movw x38, x31 ; x32:x38 <= x37:x31 + mov x31, t1j ; x31 <= x36 + mov x37, t0j ; x37 <= x30 + + dec rcnt + breq round_loop_end + jmp round_loop_start + +round_loop_end: + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + st Y+, x20 + st Y+, x21 + st Y+, x22 + st Y+, x23 + st Y+, x24 + st Y+, x25 + st Y+, x26 + st Y+, x27 + st Y+, x28 + st Y+, x29 + st Y+, x2a + st Y+, x2b + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + + POP_CONFLICT +ret + +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot192/avr8_speed/knot512.h b/knot/Implementations/crypto_aead/knot192/avr8_speed/knot512.h new file mode 100644 index 0000000..b0e4319 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/avr8_speed/knot512.h @@ -0,0 +1,435 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; +#define x20 r0 +#define x22 r2 +#define x24 r4 +#define x26 r6 +#define x28 r1 +#define x2a r3 +#define x2c r5 +#define x2e r7 + +#define x30 r8 +#define x3d r10 +#define x3a r12 +#define x37 r14 +#define x34 r16 +#define x31 r18 +#define x3e r20 +#define x3b r22 +#define x38 r9 +#define x35 r11 +#define x32 r13 +#define x3f r15 +#define x3c r17 +#define x39 r19 +#define x36 r21 +#define x33 r23 + +#define t0j r24 +#define t1j r25 +#define x0j r25 +#define x1j r27 +#define x2j r26 + +#include "assist.h" + +.macro Sbox i0, i1, i2, i3 + ldi t0j, 0xFF + eor \i0, t0j + mov t0j, \i1 + and \i1, \i0 + eor \i1, \i2 + or \i2, t0j + eor \i0, \i3 + eor \i2, \i0 + eor t0j, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, t0j + and t0j, \i2 + eor \i1, t0j +.endm + +.macro TwoColumns i2_e, i3_e, i3_o + ; column 2i + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, \i2_e, \i3_e + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ; column 2i+1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x2j, \i3_o + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + ldd t0j, Y + 2 * ROW_INBYTES + 1 + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t0j +.endm + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + push rcnt + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + + ldd x20, Y + 0x00 + ldd x22, Y + 0x02 + ldd x24, Y + 0x04 + ldd x26, Y + 0x06 + ldd x28, Y + 0x08 + ldd x2a, Y + 0x0a + ldd x2c, Y + 0x0c + ldd x2e, Y + 0x0e + + adiw YL, ROW_INBYTES + + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + ld x3c, Y+ + ld x3d, Y+ + ld x3e, Y+ + ld x3f, Y+ + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH + rjmp For_Hash +For_AEAD: + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) + rjmp round_loop_start +For_Hash: + ldi ZL, lo8(RC_LFSR8) + ldi ZH, hi8(RC_LFSR8) +#elif defined(CRYPTO_AEAD) + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) +#else + ldi ZL, lo8(RC_LFSR8) + ldi ZH, hi8(RC_LFSR8) +#endif + + +round_loop_start: + ; AddRC + lpm t0j, Z+ + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + + ; column 0 + ld x0j, Y + eor x0j, t0j + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x20, x30 + st Y+, x0j + lsl x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ; column 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + Sbox x0j, x1j, x2j, x31 + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + ldd t0j, Y + 2 * ROW_INBYTES + 1 + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t0j + + ; column 2, 3 + TwoColumns x22, x32, x33 + ; column 4, 5 + TwoColumns x24, x34, x35 + ; column 6, 7 + TwoColumns x26, x36, x37 + ; column 8, 9 + TwoColumns x28, x38, x39 + ; column 10, 11 + TwoColumns x2a, x3a, x3b + ; column 12, 13 + TwoColumns x2c, x3c, x3d + + ; column 14 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x2e, x3e + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ; column 15 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x2j, x3f + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ld x1j, Y + eor t0j, t0j + adc x1j, t0j + st Y, x1j + std Y + ROW_INBYTES + 1, x2j + + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2 + ; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3 + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + ; x2e x2c x2a x28 x26 x24 x22 x20 => x2c x2a x28 x26 x24 x22 x20 x2e + ;mov t0j, x2e + ;mov x2e, x2c + ;mov x2c, x2a + ;mov x2a, x28 + ;mov x28, x26 + ;mov x26, x24 + ;mov x24, x22 + ;mov x22, x20 + ;mov x20, t0j + ; an intentionally arrangement of registers to facilitate movw + movw t0j, x26 ; t1j:t0j <= x2e:x26 + movw x26, x24 ; x2e:x26 <= x2c:x24 + movw x24, x22 ; x2c:x24 <= x2a:x22 + movw x22, x20 ; x2a:x22 <= x28:x20 + mov x20, t1j ; x20 <= t1j + mov x28, t0j ; x28 <= t0j + + ; <<< 1 + mov t0j, x3f + rol t0j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + rol x38 + rol x39 + rol x3a + rol x3b + rol x3c + rol x3d + rol x3e + rol x3f + ; <<< 24 + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 => + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + ; mov x3j, x30 + ; mov x30, x3d + ; mov x3d, x3a + ; mov x3a, x37 + ; mov x37, x34 + ; mov x34, x31 + ; mov x31, x3e + ; mov x3e, x3b + ; mov x3b, x38 + ; mov x38, x35 + ; mov x35, x32 + ; mov x32, x3f + ; mov x3f, x3c + ; mov x3c, x39 + ; mov x39, x36 + ; mov x36, x33 + ; mov x33, x3j + ; an intentionally arrangement of registers to facilitate movw + ; x30 r8 + ; x3d r10 + ; x3a r12 + ; x37 r14 + ; x34 r16 + ; x31 r18 + ; x3e r20 + ; x3b r22 + ; x38 r9 + ; x35 r11 + ; x32 r13 + ; x3f r15 + ; x3c r17 + ; x39 r19 + ; x36 r21 + ; x33 r23 + movw t0j, x30 ; t1j:t0j <= x38:x30 + movw x30, x3d ; x38:x30 <= x35:x3d + movw x3d, x3a ; x35:x3d <= x32:x3a + movw x3a, x37 ; x32:x3a <= x3f:x37 + movw x37, x34 ; x3f:x37 <= x3c:x34 + movw x34, x31 ; x3c:x34 <= x39:x31 + movw x31, x3e ; x39:x31 <= x36:x3e + movw x3e, x3b ; x36:x3e <= x33:x3b + mov x3b, t1j ; x3b <= x38 + mov x33, t0j ; x33 <= x30 + + pop rcnt + dec rcnt + push rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + pop rcnt + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + std Y + 0x00, x20 + std Y + 0x02, x22 + std Y + 0x04, x24 + std Y + 0x06, x26 + std Y + 0x08, x28 + std Y + 0x0a, x2a + std Y + 0x0c, x2c + std Y + 0x0e, x2e + adiw YL, ROW_INBYTES + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + st Y+, x3c + st Y+, x3d + st Y+, x3e + st Y+, x3f + + POP_CONFLICT +ret + +.section .text +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +RC_LFSR8: +.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e +.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25 +.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0 +.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9 +.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37 +.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20 +.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad +.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac +.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0 +.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb +.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a +.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8 +.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86 +.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7 +.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c +.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5 +.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4 +.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52 +.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7 +.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76 +.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b +.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4 +.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd +.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46 +.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e +.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab +.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5 +.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09 +.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe +.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17 +.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d +.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00 +#elif defined(CRYPTO_AEAD) +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +#else +RC_LFSR8: +.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e +.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25 +.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0 +.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9 +.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37 +.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20 +.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad +.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac +.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0 +.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb +.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a +.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8 +.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86 +.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7 +.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c +.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5 +.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4 +.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52 +.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7 +.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76 +.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b +.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4 +.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd +.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46 +.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e +.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab +.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5 +.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09 +.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe +.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17 +.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d +.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00 +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot192/avr8_speed/permutation.h b/knot/Implementations/crypto_aead/knot192/avr8_speed/permutation.h new file mode 100644 index 0000000..e6c9793 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/avr8_speed/permutation.h @@ -0,0 +1,45 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; R E G I S T E R D E F I N I T I O N S +; ============================================ +; + +#define mclen r16 +#define radlen r17 +#define tcnt r17 +#define tmp0 r20 +#define tmp1 r21 +#define cnt0 r22 +#define rn r23 +#define rate r24 + + +; AEDH = 0b000: for authenticate AD +; AEDH = 0b001: for encryption +; AEDH = 0b011: for decryption +; AEDH = 0b100: for hash +#define AEDH r25 +#define rcnt r26 + +#if (STATE_INBITS==256) +#include "knot256.h" +#elif (STATE_INBITS==384) +#include "knot384.h" +#elif (STATE_INBITS==512) +#include "knot512.h" +#else +#error "Not specified key size and state size" +#endif + + diff --git a/knot/Implementations/crypto_aead/knot256/avr8_speed/api.h b/knot/Implementations/crypto_aead/knot256/avr8_speed/api.h new file mode 100644 index 0000000..5c0f032 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/avr8_speed/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 32 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 32 +#define CRYPTO_ABYTES 32 +#define CRYPTO_NOOVERLAP 1 diff --git a/knot/Implementations/crypto_aead/knot256/avr8_speed/assist.h b/knot/Implementations/crypto_aead/knot256/avr8_speed/assist.h new file mode 100644 index 0000000..f95a717 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/avr8_speed/assist.h @@ -0,0 +1,86 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; +.macro PUSH_CONFLICT + push r16 + push r17 + push r18 + push r19 + + push r23 + push r24 + push r25 + push r26 + push r27 + push r28 + push r29 + push r30 + push r31 +.endm + +.macro POP_CONFLICT + pop r31 + pop r30 + pop r29 + pop r28 + pop r27 + pop r26 + pop r25 + pop r24 + pop r23 + + pop r19 + pop r18 + pop r17 + pop r16 +.endm + +.macro PUSH_ALL + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 +.endm + +.macro POP_ALL + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + clr r1 +.endm \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot256/avr8_speed/config.h b/knot/Implementations/crypto_aead/knot256/avr8_speed/config.h new file mode 100644 index 0000000..8fb6034 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/avr8_speed/config.h @@ -0,0 +1,131 @@ +#ifndef __CONFIG_H__ +#define __CONFIG_H__ + +#define CRYPTO_AEAD +//#define CRYPTO_HASH + +#define MAX_MESSAGE_LENGTH 128 + +#define STATE_INBITS 512 +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBITS 256 +/* For CRYPTO_HASH */ +#define CRYPTO_BITS 512 + +#define STATE_INBYTES ((STATE_INBITS + 7) / 8) +#define ROW_INBITS ((STATE_INBITS + 3) / 4) +#define ROW_INBYTES ((ROW_INBITS + 7) / 8) + +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8) +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES +#define CRYPTO_ABYTES CRYPTO_KEYBYTES +#define CRYPTO_NOOVERLAP 1 + +#define MAX_ASSOCIATED_DATA_LENGTH 32 +#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES) + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +/* For CRYPTO_HASH */ +#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8) + + + +#define DOMAIN_BITS 0x80 +#define PAD_BITS 0x01 +#define S384_R192_BITS 0x80 + +#if (STATE_INBITS==256) +#define C1 1 +#define C2 8 +#define C3 25 +#elif (STATE_INBITS==384) +#define C1 1 +#define C2 8 +#define C3 55 +#elif (STATE_INBITS==512) +#define C1 1 +#define C2 16 +#define C3 25 +#else +#error "Not specified state size" +#endif + +#ifdef CRYPTO_AEAD +/* For CRYPTO_AEAD */ +#define KEY_INBITS (CRYPTO_KEYBYTES * 8) +#define KEY_INBYTES (CRYPTO_KEYBYTES) + +#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8) +#define NONCE_INBYTES (CRYPTO_NPUBBYTES) + +#define TAG_INBITS (CRYPTO_ABYTES * 8) +#define TAG_INBYTES (CRYPTO_ABYTES) + +#if (KEY_INBITS==128) && (STATE_INBITS==256) +#define RATE_INBITS 64 +#define NR_0 52 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==128) && (STATE_INBITS==384) +#define RATE_INBITS 192 +#define NR_0 76 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==192) && (STATE_INBITS==384) +#define RATE_INBITS 96 +#define NR_0 76 +#define NR_i 40 +#define NR_f 44 +#elif (KEY_INBITS==256) && (STATE_INBITS==512) +#define RATE_INBITS 128 +#define NR_0 100 +#define NR_i 52 +#define NR_f 56 +#else +#error "Not specified key size and state size" +#endif + +#define RATE_INBYTES ((RATE_INBITS + 7) / 8) +#define SQUEEZE_RATE_INBYTES TAG_INBYTES + +#endif + +#ifdef CRYPTO_HASH +/* For CRYPTO_HASH */ +#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8) + +#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256) +#define HASH_RATE_INBITS 32 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 68 +#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 128 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 80 +#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 48 +#define HASH_SQUEEZE_RATE_INBITS 192 +#define NR_h 104 +#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512) +#define HASH_RATE_INBITS 64 +#define HASH_SQUEEZE_RATE_INBITS 256 +#define NR_h 140 +#else +#error "Not specified hash digest size and state size" +#endif + +#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8) +#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8) + +#endif + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot256/avr8_speed/crypto_aead.h b/knot/Implementations/crypto_aead/knot256/avr8_speed/crypto_aead.h new file mode 100644 index 0000000..cd820d3 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/avr8_speed/crypto_aead.h @@ -0,0 +1,26 @@ +#ifdef __cplusplus +extern "C" { +#endif + +int crypto_aead_encrypt( + unsigned char *c,unsigned long long *clen, + const unsigned char *m,unsigned long long mlen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ); + + +int crypto_aead_decrypt( + unsigned char *m,unsigned long long *outputmlen, + unsigned char *nsec, + const unsigned char *c,unsigned long long clen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ); + +#ifdef __cplusplus +} +#endif diff --git a/knot/Implementations/crypto_aead/knot256/avr8_speed/encrypt.c b/knot/Implementations/crypto_aead/knot256/avr8_speed/encrypt.c new file mode 100644 index 0000000..baf0a3b --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/avr8_speed/encrypt.c @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#include "config.h" + +extern void crypto_aead_encrypt_asm( + unsigned char *c, + const unsigned char *m, + unsigned char mlen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern int crypto_aead_decrypt_asm( + unsigned char *m, + const unsigned char *c, + unsigned char clen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern void crypto_hash_asm( + unsigned char *out, + const unsigned char *in, + unsigned char inlen + ); + + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the cipher implementation goes here, + ... generating a ciphertext c[0],c[1],...,c[*clen-1] + ... from a plaintext m[0],m[1],...,m[mlen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce npub[0],npub[1],.. + ... and secret key k[0],k[1],... + ... the implementation shall not use nsec + ... + ... return 0; + */ + + (void)nsec; + + crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k); + + *clen = mlen + TAG_INBYTES; + return 0; +} + + + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the AEAD implementation goes here, + ... generating a plaintext m[0],m[1],...,m[*mlen-1] + ... and secret message number nsec[0],nsec[1],... + ... from a ciphertext c[0],c[1],...,c[clen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce number npub[0],npub[1],... + ... and secret key k[0],k[1],... + ... + ... return 0; + */ + unsigned long long mlen_; + unsigned char tag_is_match; + + (void)nsec; + if (clen < CRYPTO_ABYTES) { + return -1; + } + mlen_ = clen - CRYPTO_ABYTES; + + tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k); + + if (tag_is_match != 0) + { + memset(m, 0, (size_t)mlen_); + return -1; + } + + *mlen = mlen_; + return 0; +} \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot256/avr8_speed/encrypt_core.S b/knot/Implementations/crypto_aead/knot256/avr8_speed/encrypt_core.S new file mode 100644 index 0000000..bd74f93 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/avr8_speed/encrypt_core.S @@ -0,0 +1,555 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; S R A M D E F I N I T I O N S +; ============================================ +; +#include +#include "config.h" + +.section .noinit + SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (STATE_INBYTES > 32) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (STATE_INBYTES > 48) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_LEN: .BYTE 0, 0 +#ifdef CRYPTO_AEAD +; For CRYPTO_AEAD + SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0 + SRAM_ADLEN: .BYTE 0, 0 + SRAM_NONCE_ADDR: .BYTE 0, 0 + SRAM_KEY_ADDR: .BYTE 0, 0 + + SRAM_ADDITIONAL: + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (CRYPTO_ABYTES > 16) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (CRYPTO_ABYTES > 24) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + +#endif + +.section .text + +#include "permutation.h" + +; require YH:YL be the address of the current associated data/cipher/message block +; for enc and dec, store ciphertext or plaintext +; require ZH:ZL be the address of the current cipher/message block +.macro XOR_to_State_ENCDEC + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov cnt0, rate +XOR_to_State_loop_ENCDEC: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_to_State_loop_ENCDEC +; YH:YL are now the address of the next associated data block +.endm + +; require YH:YL be the address of the current associated data/cipher/message block +; for enc and dec, store ciphertext or plaintext +; require ZH:ZL be the address of the current cipher/message block +.macro XOR_to_State_AUTH + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov cnt0, rate +XOR_to_State_loop_AUTH: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_to_State_loop_AUTH +; YH:YL are now the address of the next associated data block +.endm + + + +; require YH:YL pointed to the input data +; require ZH:ZL pointed to the output data +; require cnt0 containes the nubmer of bytes in source data +; require number of bytes in source data less than rate, i.e., 0 <= cnt0 < rate +; +; the 0th bit in AEDH is used to distinguish (auth AD) or (enc/dec M/C): +; AEDH[0] = 0 for (auth AD), AEDH[0] = 1 for (enc/dec M/C) +; the 1th bit in AEDH is used to distinguish (auth AD/enc M) or (dec C): +; AEDH[1] = 0 for (auth AD/enc M), AEDH[1] = 1 for (dec C) +; AEDH = 0b000 for (auth AD) +; AEDH = 0b001 for (enc M) +; AEDH = 0b011 for (dec C) +Pad_XOR_to_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + tst cnt0 + breq XOR_padded_data +XOR_source_data_loop: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_source_data_loop +XOR_padded_data: + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 +ret + +AddDomain: + ldi XH, hi8(SRAM_STATE + STATE_INBYTES - 1) + ldi XL, lo8(SRAM_STATE + STATE_INBYTES - 1) + ldi tmp0, DOMAIN_BITS + ld tmp1, X + eor tmp0, tmp1 + st X, tmp0 +ret + +; require ZH:ZL be the address of the destination +EXTRACT_from_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov tmp1, rate +EXTRACT_from_State_loop: + ld tmp0, X+ + st Z+, tmp0 + dec tmp1 + brne EXTRACT_from_State_loop +ret + +AUTH: + tst radlen + breq AUTH_end + + cp radlen, rate + brlo auth_ad_padded_block + +auth_ad_loop: + XOR_to_State_AUTH + rcall Permutation + sub radlen, rate + cp radlen, rate + brlo auth_ad_padded_block + rjmp auth_ad_loop + +auth_ad_padded_block: + mov cnt0, radlen + rcall Pad_XOR_to_State + rcall Permutation + +AUTH_end: +ret + +#ifdef CRYPTO_AEAD +Initialization: + ldi rn, NR_0 + ldi XL, lo8(SRAM_STATE) + ldi XH, hi8(SRAM_STATE) + + lds YH, SRAM_NONCE_ADDR + lds YL, SRAM_NONCE_ADDR + 1 + ldi cnt0, CRYPTO_NPUBBYTES +load_nonce_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_nonce_loop + + lds YH, SRAM_KEY_ADDR + lds YL, SRAM_KEY_ADDR + 1 + ldi cnt0, CRYPTO_KEYBYTES +load_key_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_key_loop + +#if (STATE_INBITS==384) && (RATE_INBITS==192) + ldi cnt0, (STATE_INBYTES - CRYPTO_NPUBBYTES - CRYPTO_KEYBYTES - 1) + clr tmp0 +empty_state_loop: + st X+, tmp0 + dec cnt0 + brne empty_state_loop + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + rcall Permutation +ret + +ENC: + tst mclen + breq ENC_end + + cp mclen, rate + brlo enc_padded_block + +enc_loop: + XOR_to_State_ENCDEC + ldi rn, NR_i + rcall Permutation + sub mclen, rate + cp mclen, rate + brlo enc_padded_block + rjmp enc_loop + +enc_padded_block: + mov cnt0, mclen + rcall Pad_XOR_to_State +ENC_end: +ret + +Finalization: + ldi rate, SQUEEZE_RATE_INBYTES + ldi rn, NR_f + rcall Permutation + rcall EXTRACT_from_State +ret + +; void crypto_aead_encrypt_asm( +; unsigned char *c, +; const unsigned char *m, +; unsigned long long mlen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *c, is passed in r24:r25 +; const unsigned char *m, is passed in r22:r23 +; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_encrypt_asm +crypto_aead_encrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b001 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + rcall Finalization + POP_ALL +ret + +; int crypto_aead_decrypt_asm( +; unsigned char *m, +; const unsigned char *c, +; unsigned long long clen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *m, is passed in r24:r25 +; const unsigned char *c, is passed in r22:r23 +; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_decrypt_asm +crypto_aead_decrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b011 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + + ldi ZH, hi8(SRAM_ADDITIONAL) + ldi ZL, lo8(SRAM_ADDITIONAL) + rcall Finalization + + sbiw ZL, CRYPTO_ABYTES + ldi cnt0, CRYPTO_ABYTES +compare_tag: + ld tmp0, Z+ + ld tmp1, Y+ + cp tmp0, tmp1 + brne return_tag_not_match + dec cnt0 + brne compare_tag + rjmp return_tag_match + +return_tag_not_match: + ldi r25, 0xFF + ldi r24, 0xFF + rjmp crypto_aead_decrypt_end + +return_tag_match: + clr r25 + clr r24 +crypto_aead_decrypt_end: + POP_ALL +ret + +; #ifdef CRYPTO_AEAD +#endif + + +#ifdef CRYPTO_HASH + +; void crypto_hash_asm( +; unsigned char *out, +; const unsigned char *in, +; unsigned long long inlen +; ) +; +; unsigned char *out, is passed in r24:r25 +; const unsigned char *in, is passed in r22:r23 +; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used +.global crypto_hash_asm +crypto_hash_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + mov mclen, r20 + + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi cnt0, STATE_INBYTES - 1 +#else + ldi cnt0, STATE_INBYTES +#endif + clr tmp0 +zero_state: + st X+, tmp0 + dec cnt0 + brne zero_state + +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + ldi rn, NR_h + ldi AEDH, 0b100 + +HASH_ABSORBING: + mov radlen, mclen + tst radlen + breq EMPTY_M + ldi rate, HASH_RATE_INBYTES + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + rcall AUTH + rjmp HASH_SQUEEZING + +EMPTY_M: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 + rcall Permutation + +HASH_SQUEEZING: + ldi rate, HASH_SQUEEZE_RATE_INBYTES + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + ldi tcnt, CRYPTO_BYTES +SQUEEZING_loop: + rcall EXTRACT_from_State + subi tcnt, HASH_SQUEEZE_RATE_INBYTES + breq HASH_SQUEEZING_end + rcall Permutation + rjmp SQUEEZING_loop +HASH_SQUEEZING_end: + POP_ALL +ret + +#endif + + +; Byte Order In AVR 8: +; KNOT-AEAD(128, 256, 64): +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[7] <<< 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Perm_row_1[0] 1 +; N[ 9] AEAD_State[ 9] | Perm_row_1[1] 1 +; N[10] AEAD_State[10] | Perm_row_1[2] 1 +; N[11] AEAD_State[11] | Perm_row_1[3] 1 +; N[12] AEAD_State[12] | Perm_row_1[4] 1 +; N[13] AEAD_State[13] | Perm_row_1[5] 1 +; N[14] AEAD_State[14] | Perm_row_1[6] 1 +; N[15] AEAD_State[15] | Perm_row_1[7] <<< 1 +; K[ 0] AEAD_State[16] | Perm_row_2[0] 8 +; K[ 1] AEAD_State[17] | Perm_row_2[1] 8 +; K[ 2] AEAD_State[18] | Perm_row_2[2] 8 +; K[ 3] AEAD_State[19] | Perm_row_2[3] 8 +; K[ 4] AEAD_State[20] | Perm_row_2[4] 8 +; K[ 5] AEAD_State[21] | Perm_row_2[5] 8 +; K[ 6] AEAD_State[22] | Perm_row_2[6] 8 +; K[ 7] AEAD_State[23] | Perm_row_2[7] <<< 8 +; K[ 8] AEAD_State[24] | Perm_row_3[0] 25 +; K[ 9] AEAD_State[25] | Perm_row_3[1] 25 +; K[10] AEAD_State[26] | Perm_row_3[2] 25 +; K[11] AEAD_State[27] | Perm_row_3[3] 25 +; K[12] AEAD_State[28] | Perm_row_3[4] 25 +; K[13] AEAD_State[29] | Perm_row_3[5] 25 +; K[14] AEAD_State[30] | Perm_row_3[6] 25 +; K[15] AEAD_State[31] | ^0x80 Perm_row_3[7] <<< 25 +; +; +; KNOT-AEAD(128, 384, 192): +; Initalization +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[ 0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[ 1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[ 2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[ 3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[ 4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[ 5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[ 6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[ 7] 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Message[ 8] 0x00 Perm_row_0[ 8] 0 Tag[ 8] +; N[ 9] AEAD_State[ 9] | Message[ 9] 0x00 Perm_row_0[ 9] 0 Tag[ 9] +; N[10] AEAD_State[10] | Message[10] 0x00 Perm_row_0[10] 0 Tag[10] +; N[11] AEAD_State[11] | Message[11] 0x00 Perm_row_0[11] <<< 0 Tag[11] +; N[12] AEAD_State[12] | Message[12] 0x00 Perm_row_1[ 0] 1 Tag[12] +; N[13] AEAD_State[13] | Message[13] 0x00 Perm_row_1[ 1] 1 Tag[13] +; N[14] AEAD_State[14] | Message[14] 0x00 Perm_row_1[ 2] 1 Tag[14] +; N[15] AEAD_State[15] | Message[15] 0x00 Perm_row_1[ 3] 1 Tag[15] +; K[ 0] AEAD_State[16] | Message[16] 0x00 Perm_row_1[ 4] 1 +; K[ 1] AEAD_State[17] | Message[17] 0x00 Perm_row_1[ 5] 1 +; K[ 2] AEAD_State[18] | Message[18] 0x00 Perm_row_1[ 6] 1 +; K[ 3] AEAD_State[19] | Message[19] 0x00 Perm_row_1[ 7] 1 +; K[ 4] AEAD_State[20] | Message[20] 0x00 Perm_row_1[ 8] 1 +; K[ 5] AEAD_State[21] | Message[21] 0x00 Perm_row_1[ 9] 1 +; K[ 6] AEAD_State[22] | Message[22] 0x00 Perm_row_1[10] 1 +; K[ 7] AEAD_State[23] | Message[23] 0x00 Perm_row_1[11] <<< 1 +; K[ 8] AEAD_State[24] | Perm_row_2[ 0] 8 +; K[ 9] AEAD_State[25] | Perm_row_2[ 1] 8 +; K[10] AEAD_State[26] | Perm_row_2[ 2] 8 +; K[11] AEAD_State[27] | Perm_row_2[ 3] 8 +; K[12] AEAD_State[28] | Perm_row_2[ 4] 8 +; K[13] AEAD_State[29] | Perm_row_2[ 5] 8 +; K[14] AEAD_State[30] | Perm_row_2[ 6] 8 +; K[15] AEAD_State[31] | Perm_row_2[ 7] 8 +; 0x00 AEAD_State[32] | Perm_row_2[ 8] 8 +; 0x00 AEAD_State[33] | Perm_row_2[ 9] 8 +; 0x00 AEAD_State[34] | Perm_row_2[10] 8 +; 0x00 AEAD_State[35] | Perm_row_2[11] <<< 8 +; 0x00 AEAD_State[36] | Perm_row_3[ 0] 55 +; 0x00 AEAD_State[37] | Perm_row_3[ 1] 55 +; 0x00 AEAD_State[38] | Perm_row_3[ 2] 55 +; 0x00 AEAD_State[39] | Perm_row_3[ 3] 55 +; 0x00 AEAD_State[40] | Perm_row_3[ 4] 55 +; 0x00 AEAD_State[41] | Perm_row_3[ 5] 55 +; 0x00 AEAD_State[42] | Perm_row_3[ 6] 55 +; 0x00 AEAD_State[43] | Perm_row_3[ 7] 55 +; 0x00 AEAD_State[44] | Perm_row_3[ 8] 55 +; 0x00 AEAD_State[45] | Perm_row_3[ 9] 55 +; 0x00 AEAD_State[46] | Perm_row_3[10] 55 +; 0x00 ^0x80 AEAD_State[47] | ^0x80 Perm_row_3[11] <<< 55 diff --git a/knot/Implementations/crypto_aead/knot256/avr8_speed/knot256.h b/knot/Implementations/crypto_aead/knot256/avr8_speed/knot256.h new file mode 100644 index 0000000..f99f68b --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/avr8_speed/knot256.h @@ -0,0 +1,306 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; +#define x10 r0 +#define x11 r1 +#define x12 r2 +#define x13 r3 +#define x14 r4 +#define x15 r5 +#define x16 r6 +#define x17 r7 + +; an intentionally arrangement of registers to facilitate movw +#define x20 r8 +#define x21 r10 +#define x22 r12 +#define x23 r14 +#define x24 r9 +#define x25 r11 +#define x26 r13 +#define x27 r15 + +; an intentionally arrangement of registers to facilitate movw +#define x30 r16 +#define x35 r18 +#define x32 r20 +#define x37 r22 +#define x34 r17 +#define x31 r19 +#define x36 r21 +#define x33 r23 + +#define t0j r24 +#define t1j r25 +#define x0j r27 + +#include "assist.h" + +.macro Sbox i0, i1, i2, i3 + mov t0j, \i1 + com \i0 + and \i1, \i0 + eor \i1, \i2 + or \i2, t0j + eor \i0, \i3 + eor \i2, \i0 + eor t0j, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, t0j + and t0j, \i2 + eor \i1, t0j +.endm + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + ld x10, Y+ + ld x11, Y+ + ld x12, Y+ + ld x13, Y+ + ld x14, Y+ + ld x15, Y+ + ld x16, Y+ + ld x17, Y+ + ld x20, Y+ + ld x21, Y+ + ld x22, Y+ + ld x23, Y+ + ld x24, Y+ + ld x25, Y+ + ld x26, Y+ + ld x27, Y+ + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH + rjmp For_Hash +For_AEAD: + ldi ZL, lo8(RC_LFSR6) + ldi ZH, hi8(RC_LFSR6) + rjmp round_loop_start +For_Hash: + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) +#elif defined(CRYPTO_AEAD) + ldi ZL, lo8(RC_LFSR6) + ldi ZH, hi8(RC_LFSR6) +#else + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) +#endif + +round_loop_start: + ; AddRC + lpm t0j, Z+ + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + ld x0j, Y + eor x0j, t0j + + ; SubColumns + Sbox x0j, x10, x20, x30 + st Y+, x0j + ld x0j, Y + Sbox x0j, x11, x21, x31 + st Y+, x0j + ld x0j, Y + Sbox x0j, x12, x22, x32 + st Y+, x0j + ld x0j, Y + Sbox x0j, x13, x23, x33 + st Y+, x0j + ld x0j, Y + Sbox x0j, x14, x24, x34 + st Y+, x0j + ld x0j, Y + Sbox x0j, x15, x25, x35 + st Y+, x0j + ld x0j, Y + Sbox x0j, x16, x26, x36 + st Y+, x0j + ld x0j, Y + Sbox x0j, x17, x27, x37 + st Y, x0j + + ; ShiftRows + ; <<< 1 + mov t0j, x17 + rol t0j + rol x10 + rol x11 + rol x12 + rol x13 + rol x14 + rol x15 + rol x16 + rol x17 + + ; <<< 8 + ; 7 6 5 4 3 2 1 0 => 6 5 4 3 2 1 0 7 + ;mov t0j, x27 + ;mov x27, x26 + ;mov x26, x25 + ;mov x25, x24 + ;mov x24, x23 + ;mov x23, x22 + ;mov x22, x21 + ;mov x21, x20 + ;mov x20, t0j + ; an intentionally arrangement of registers to facilitate movw + movw t0j, x23 ; t1j:t0j <= x27:x23 + movw x23, x22 ; x27:x23 <= x26:x22 + movw x22, x21 ; x26:x22 <= x25:x21 + movw x21, x20 ; x25:x21 <= x24:x20 + mov x20, t1j ; x20 <= t1j + mov x24, t0j ; x24 <= t0j + + ; <<< 1 + mov t0j, x37 + rol t0j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + ; <<< 24 + ; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5 + ;mov t0j, x30 + ;mov x30, x35 + ;mov x35, x32 + ;mov x32, x37 + ;mov x37, x34 + ;mov x34, x31 + ;mov x31, x36 + ;mov x36, x33 + ;mov x33, t0j + ; an intentionally arrangement of registers to facilitate movw + ;x30 r16 + ;x35 r18 + ;x32 r20 + ;x37 r22 + ;x34 r17 + ;x31 r19 + ;x36 r21 + ;x33 r23 + movw t0j, x30 ; t1j:t0j <= x34:x30 + movw x30, x35 ; x34:x30 <= x31:x35 + movw x35, x32 ; x31:x35 <= x36:x32 + movw x32, x37 ; x36:x32 <= x33:x37 + mov x37, t1j ; x37 <= x34 + mov x33, t0j ; x33 <= x30 + + dec rcnt + breq round_loop_end + jmp round_loop_start + +round_loop_end: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + st Y+, x10 + st Y+, x11 + st Y+, x12 + st Y+, x13 + st Y+, x14 + st Y+, x15 + st Y+, x16 + st Y+, x17 + st Y+, x20 + st Y+, x21 + st Y+, x22 + st Y+, x23 + st Y+, x24 + st Y+, x25 + st Y+, x26 + st Y+, x27 + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + + POP_CONFLICT +ret + + +.section .text +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +RC_LFSR6: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06 +.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29 +.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28 +.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24 +.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37 +.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26 +.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f +.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00 +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +#elif defined(CRYPTO_AEAD) +RC_LFSR6: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06 +.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29 +.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28 +.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24 +.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37 +.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26 +.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f +.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00 +#else +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot256/avr8_speed/knot384.h b/knot/Implementations/crypto_aead/knot256/avr8_speed/knot384.h new file mode 100644 index 0000000..0b3dd75 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/avr8_speed/knot384.h @@ -0,0 +1,261 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; + +; an intentionally arrangement of registers to facilitate movw +#define x20 r0 +#define x21 r2 +#define x22 r4 +#define x23 r6 +#define x24 r8 +#define x25 r10 +#define x26 r1 +#define x27 r3 +#define x28 r5 +#define x29 r7 +#define x2a r9 +#define x2b r11 + +; an intentionally arrangement of registers to facilitate movw +#define x30 r22 +#define x35 r20 +#define x3a r18 +#define x33 r16 +#define x38 r14 +#define x31 r12 +#define x36 r23 +#define x3b r21 +#define x34 r19 +#define x39 r17 +#define x32 r15 +#define x37 r13 + +#define t0j r24 +#define t1j r25 +#define x0j r25 +#define x1j r27 + +#include "assist.h" + +.macro Sbox i0, i1, i2, i3 + ldi t0j, 0xFF + eor \i0, t0j + mov t0j, \i1 + and \i1, \i0 + eor \i1, \i2 + or \i2, t0j + eor \i0, \i3 + eor \i2, \i0 + eor t0j, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, t0j + and t0j, \i2 + eor \i1, t0j +.endm + +.macro OneColumn i0, i1, i2, i3 + ld \i0, Y + ldd \i1, Y + ROW_INBYTES + Sbox \i0, \i1, \i2, \i3 + st Y+, \i0 + rol \i1 ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES -1, \i1 +.endm + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + ld x20, Y+ + ld x21, Y+ + ld x22, Y+ + ld x23, Y+ + ld x24, Y+ + ld x25, Y+ + ld x26, Y+ + ld x27, Y+ + ld x28, Y+ + ld x29, Y+ + ld x2a, Y+ + ld x2b, Y+ + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) + +round_loop_start: + ; AddRC + lpm t0j, Z+ + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + ld x0j, Y + eor x0j, t0j + + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x20, x30 + st Y+, x0j + lsl x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES -1, x1j + + OneColumn x0j, x1j, x21, x31 + OneColumn x0j, x1j, x22, x32 + OneColumn x0j, x1j, x23, x33 + OneColumn x0j, x1j, x24, x34 + OneColumn x0j, x1j, x25, x35 + OneColumn x0j, x1j, x26, x36 + OneColumn x0j, x1j, x27, x37 + OneColumn x0j, x1j, x28, x38 + OneColumn x0j, x1j, x29, x39 + OneColumn x0j, x1j, x2a, x3a + OneColumn x0j, x1j, x2b, x3b + + ld x1j, Y + eor t0j, t0j + adc x1j, t0j + st Y, x1j + + ; b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- x- -- 1 + ; -- -- -- -- x' -- -- -- -- -- -- -- 7 + ; 4 3 2 1 0 b a 9 8 7 6 5 + + ; ShiftRows -- the last two rows + ; <<< 8 + ; b a 9 8 7 6 5 4 3 2 1 0 => a 9 8 7 6 5 4 3 2 1 0 b + movw t0j, x25 ; t1j:t0j <= x2b:x25 + movw x25, x24 ; x2b:x25 <= x2a:x24 + movw x24, x23 ; x2a:x24 <= x29:x23 + movw x23, x22 ; x29:x23 <= x28:x22 + movw x22, x21 ; x28:x22 <= x27:x21 + movw x21, x20 ; x27:x21 <= x26:x20 + mov x26, t0j ; x26 <= x25 + mov x20, t1j ; x20 <= x2b + + ; >>> 1 + mov t0j, x3b + ror t0j + ror x3a + ror x39 + ror x38 + ror x37 + ror x36 + ror x35 + ror x34 + ror x33 + ror x32 + ror x31 + ror x30 + ror x3b + ; <<< 56 + ; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5 + ; mov x3j, x30 + ; mov x30, x35 + ; mov x35, x3a + ; mov x3a, x33 + ; mov x33, x38 + ; mov x38, x31 + ; mov x31, x36 + ; mov x36, x3b + ; mov x3b, x34 + ; mov x34, x39 + ; mov x39, x32 + ; mov x32, x37 + ; mov x37, x3j + ; an intentionally arrangement of registers to facilitate movw + ; x30 r22 + ; x35 r20 + ; x3a r18 + ; x33 r16 + ; x38 r14 + ; x31 r12 + ; x36 r23 + ; x3b r21 + ; x34 r19 + ; x39 r17 + ; x32 r15 + ; x37 r13 + movw t0j, x30 ; t1j:t0j <= x36:x30 + movw x30, x35 ; x36:x30 <= x3b:x35 + movw x35, x3a ; x3b:x35 <= x34:x3a + movw x3a, x33 ; x34:x3a <= x39:x33 + movw x33, x38 ; x39:x33 <= x32:x38 + movw x38, x31 ; x32:x38 <= x37:x31 + mov x31, t1j ; x31 <= x36 + mov x37, t0j ; x37 <= x30 + + dec rcnt + breq round_loop_end + jmp round_loop_start + +round_loop_end: + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + st Y+, x20 + st Y+, x21 + st Y+, x22 + st Y+, x23 + st Y+, x24 + st Y+, x25 + st Y+, x26 + st Y+, x27 + st Y+, x28 + st Y+, x29 + st Y+, x2a + st Y+, x2b + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + + POP_CONFLICT +ret + +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot256/avr8_speed/knot512.h b/knot/Implementations/crypto_aead/knot256/avr8_speed/knot512.h new file mode 100644 index 0000000..b0e4319 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/avr8_speed/knot512.h @@ -0,0 +1,435 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; +#define x20 r0 +#define x22 r2 +#define x24 r4 +#define x26 r6 +#define x28 r1 +#define x2a r3 +#define x2c r5 +#define x2e r7 + +#define x30 r8 +#define x3d r10 +#define x3a r12 +#define x37 r14 +#define x34 r16 +#define x31 r18 +#define x3e r20 +#define x3b r22 +#define x38 r9 +#define x35 r11 +#define x32 r13 +#define x3f r15 +#define x3c r17 +#define x39 r19 +#define x36 r21 +#define x33 r23 + +#define t0j r24 +#define t1j r25 +#define x0j r25 +#define x1j r27 +#define x2j r26 + +#include "assist.h" + +.macro Sbox i0, i1, i2, i3 + ldi t0j, 0xFF + eor \i0, t0j + mov t0j, \i1 + and \i1, \i0 + eor \i1, \i2 + or \i2, t0j + eor \i0, \i3 + eor \i2, \i0 + eor t0j, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, t0j + and t0j, \i2 + eor \i1, t0j +.endm + +.macro TwoColumns i2_e, i3_e, i3_o + ; column 2i + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, \i2_e, \i3_e + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ; column 2i+1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x2j, \i3_o + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + ldd t0j, Y + 2 * ROW_INBYTES + 1 + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t0j +.endm + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + push rcnt + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + + ldd x20, Y + 0x00 + ldd x22, Y + 0x02 + ldd x24, Y + 0x04 + ldd x26, Y + 0x06 + ldd x28, Y + 0x08 + ldd x2a, Y + 0x0a + ldd x2c, Y + 0x0c + ldd x2e, Y + 0x0e + + adiw YL, ROW_INBYTES + + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + ld x3c, Y+ + ld x3d, Y+ + ld x3e, Y+ + ld x3f, Y+ + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH + rjmp For_Hash +For_AEAD: + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) + rjmp round_loop_start +For_Hash: + ldi ZL, lo8(RC_LFSR8) + ldi ZH, hi8(RC_LFSR8) +#elif defined(CRYPTO_AEAD) + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) +#else + ldi ZL, lo8(RC_LFSR8) + ldi ZH, hi8(RC_LFSR8) +#endif + + +round_loop_start: + ; AddRC + lpm t0j, Z+ + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + + ; column 0 + ld x0j, Y + eor x0j, t0j + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x20, x30 + st Y+, x0j + lsl x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ; column 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + Sbox x0j, x1j, x2j, x31 + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + ldd t0j, Y + 2 * ROW_INBYTES + 1 + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t0j + + ; column 2, 3 + TwoColumns x22, x32, x33 + ; column 4, 5 + TwoColumns x24, x34, x35 + ; column 6, 7 + TwoColumns x26, x36, x37 + ; column 8, 9 + TwoColumns x28, x38, x39 + ; column 10, 11 + TwoColumns x2a, x3a, x3b + ; column 12, 13 + TwoColumns x2c, x3c, x3d + + ; column 14 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x2e, x3e + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ; column 15 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x2j, x3f + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ld x1j, Y + eor t0j, t0j + adc x1j, t0j + st Y, x1j + std Y + ROW_INBYTES + 1, x2j + + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2 + ; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3 + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + ; x2e x2c x2a x28 x26 x24 x22 x20 => x2c x2a x28 x26 x24 x22 x20 x2e + ;mov t0j, x2e + ;mov x2e, x2c + ;mov x2c, x2a + ;mov x2a, x28 + ;mov x28, x26 + ;mov x26, x24 + ;mov x24, x22 + ;mov x22, x20 + ;mov x20, t0j + ; an intentionally arrangement of registers to facilitate movw + movw t0j, x26 ; t1j:t0j <= x2e:x26 + movw x26, x24 ; x2e:x26 <= x2c:x24 + movw x24, x22 ; x2c:x24 <= x2a:x22 + movw x22, x20 ; x2a:x22 <= x28:x20 + mov x20, t1j ; x20 <= t1j + mov x28, t0j ; x28 <= t0j + + ; <<< 1 + mov t0j, x3f + rol t0j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + rol x38 + rol x39 + rol x3a + rol x3b + rol x3c + rol x3d + rol x3e + rol x3f + ; <<< 24 + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 => + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + ; mov x3j, x30 + ; mov x30, x3d + ; mov x3d, x3a + ; mov x3a, x37 + ; mov x37, x34 + ; mov x34, x31 + ; mov x31, x3e + ; mov x3e, x3b + ; mov x3b, x38 + ; mov x38, x35 + ; mov x35, x32 + ; mov x32, x3f + ; mov x3f, x3c + ; mov x3c, x39 + ; mov x39, x36 + ; mov x36, x33 + ; mov x33, x3j + ; an intentionally arrangement of registers to facilitate movw + ; x30 r8 + ; x3d r10 + ; x3a r12 + ; x37 r14 + ; x34 r16 + ; x31 r18 + ; x3e r20 + ; x3b r22 + ; x38 r9 + ; x35 r11 + ; x32 r13 + ; x3f r15 + ; x3c r17 + ; x39 r19 + ; x36 r21 + ; x33 r23 + movw t0j, x30 ; t1j:t0j <= x38:x30 + movw x30, x3d ; x38:x30 <= x35:x3d + movw x3d, x3a ; x35:x3d <= x32:x3a + movw x3a, x37 ; x32:x3a <= x3f:x37 + movw x37, x34 ; x3f:x37 <= x3c:x34 + movw x34, x31 ; x3c:x34 <= x39:x31 + movw x31, x3e ; x39:x31 <= x36:x3e + movw x3e, x3b ; x36:x3e <= x33:x3b + mov x3b, t1j ; x3b <= x38 + mov x33, t0j ; x33 <= x30 + + pop rcnt + dec rcnt + push rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + pop rcnt + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + std Y + 0x00, x20 + std Y + 0x02, x22 + std Y + 0x04, x24 + std Y + 0x06, x26 + std Y + 0x08, x28 + std Y + 0x0a, x2a + std Y + 0x0c, x2c + std Y + 0x0e, x2e + adiw YL, ROW_INBYTES + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + st Y+, x3c + st Y+, x3d + st Y+, x3e + st Y+, x3f + + POP_CONFLICT +ret + +.section .text +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +RC_LFSR8: +.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e +.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25 +.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0 +.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9 +.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37 +.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20 +.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad +.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac +.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0 +.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb +.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a +.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8 +.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86 +.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7 +.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c +.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5 +.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4 +.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52 +.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7 +.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76 +.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b +.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4 +.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd +.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46 +.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e +.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab +.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5 +.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09 +.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe +.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17 +.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d +.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00 +#elif defined(CRYPTO_AEAD) +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +#else +RC_LFSR8: +.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e +.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25 +.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0 +.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9 +.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37 +.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20 +.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad +.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac +.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0 +.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb +.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a +.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8 +.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86 +.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7 +.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c +.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5 +.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4 +.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52 +.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7 +.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76 +.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b +.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4 +.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd +.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46 +.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e +.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab +.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5 +.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09 +.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe +.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17 +.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d +.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00 +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot256/avr8_speed/permutation.h b/knot/Implementations/crypto_aead/knot256/avr8_speed/permutation.h new file mode 100644 index 0000000..e6c9793 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/avr8_speed/permutation.h @@ -0,0 +1,45 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; R E G I S T E R D E F I N I T I O N S +; ============================================ +; + +#define mclen r16 +#define radlen r17 +#define tcnt r17 +#define tmp0 r20 +#define tmp1 r21 +#define cnt0 r22 +#define rn r23 +#define rate r24 + + +; AEDH = 0b000: for authenticate AD +; AEDH = 0b001: for encryption +; AEDH = 0b011: for decryption +; AEDH = 0b100: for hash +#define AEDH r25 +#define rcnt r26 + +#if (STATE_INBITS==256) +#include "knot256.h" +#elif (STATE_INBITS==384) +#include "knot384.h" +#elif (STATE_INBITS==512) +#include "knot512.h" +#else +#error "Not specified key size and state size" +#endif + + diff --git a/knot/Implementations/crypto_hash/knot256v1/avr8_speed/api.h b/knot/Implementations/crypto_hash/knot256v1/avr8_speed/api.h new file mode 100644 index 0000000..cb530c7 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/avr8_speed/api.h @@ -0,0 +1 @@ +#define CRYPTO_BYTES 32 \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v1/avr8_speed/assist.h b/knot/Implementations/crypto_hash/knot256v1/avr8_speed/assist.h new file mode 100644 index 0000000..f95a717 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/avr8_speed/assist.h @@ -0,0 +1,86 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; +.macro PUSH_CONFLICT + push r16 + push r17 + push r18 + push r19 + + push r23 + push r24 + push r25 + push r26 + push r27 + push r28 + push r29 + push r30 + push r31 +.endm + +.macro POP_CONFLICT + pop r31 + pop r30 + pop r29 + pop r28 + pop r27 + pop r26 + pop r25 + pop r24 + pop r23 + + pop r19 + pop r18 + pop r17 + pop r16 +.endm + +.macro PUSH_ALL + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 +.endm + +.macro POP_ALL + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + clr r1 +.endm \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v1/avr8_speed/config.h b/knot/Implementations/crypto_hash/knot256v1/avr8_speed/config.h new file mode 100644 index 0000000..467fedb --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/avr8_speed/config.h @@ -0,0 +1,131 @@ +#ifndef __CONFIG_H__ +#define __CONFIG_H__ + +//#define CRYPTO_AEAD +#define CRYPTO_HASH + +#define MAX_MESSAGE_LENGTH 128 + +#define STATE_INBITS 256 +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBITS 128 +/* For CRYPTO_HASH */ +#define CRYPTO_BITS 256 + +#define STATE_INBYTES ((STATE_INBITS + 7) / 8) +#define ROW_INBITS ((STATE_INBITS + 3) / 4) +#define ROW_INBYTES ((ROW_INBITS + 7) / 8) + +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8) +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES +#define CRYPTO_ABYTES CRYPTO_KEYBYTES +#define CRYPTO_NOOVERLAP 1 + +#define MAX_ASSOCIATED_DATA_LENGTH 32 +#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES) + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +/* For CRYPTO_HASH */ +#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8) + + + +#define DOMAIN_BITS 0x80 +#define PAD_BITS 0x01 +#define S384_R192_BITS 0x80 + +#if (STATE_INBITS==256) +#define C1 1 +#define C2 8 +#define C3 25 +#elif (STATE_INBITS==384) +#define C1 1 +#define C2 8 +#define C3 55 +#elif (STATE_INBITS==512) +#define C1 1 +#define C2 16 +#define C3 25 +#else +#error "Not specified state size" +#endif + +#ifdef CRYPTO_AEAD +/* For CRYPTO_AEAD */ +#define KEY_INBITS (CRYPTO_KEYBYTES * 8) +#define KEY_INBYTES (CRYPTO_KEYBYTES) + +#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8) +#define NONCE_INBYTES (CRYPTO_NPUBBYTES) + +#define TAG_INBITS (CRYPTO_ABYTES * 8) +#define TAG_INBYTES (CRYPTO_ABYTES) + +#if (KEY_INBITS==128) && (STATE_INBITS==256) +#define RATE_INBITS 64 +#define NR_0 52 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==128) && (STATE_INBITS==384) +#define RATE_INBITS 192 +#define NR_0 76 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==192) && (STATE_INBITS==384) +#define RATE_INBITS 96 +#define NR_0 76 +#define NR_i 40 +#define NR_f 44 +#elif (KEY_INBITS==256) && (STATE_INBITS==512) +#define RATE_INBITS 128 +#define NR_0 100 +#define NR_i 52 +#define NR_f 56 +#else +#error "Not specified key size and state size" +#endif + +#define RATE_INBYTES ((RATE_INBITS + 7) / 8) +#define SQUEEZE_RATE_INBYTES TAG_INBYTES + +#endif + +#ifdef CRYPTO_HASH +/* For CRYPTO_HASH */ +#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8) + +#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256) +#define HASH_RATE_INBITS 32 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 68 +#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 128 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 80 +#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 48 +#define HASH_SQUEEZE_RATE_INBITS 192 +#define NR_h 104 +#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512) +#define HASH_RATE_INBITS 64 +#define HASH_SQUEEZE_RATE_INBITS 256 +#define NR_h 140 +#else +#error "Not specified hash digest size and state size" +#endif + +#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8) +#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8) + +#endif + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v1/avr8_speed/crypto_hash.h b/knot/Implementations/crypto_hash/knot256v1/avr8_speed/crypto_hash.h new file mode 100644 index 0000000..342a639 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/avr8_speed/crypto_hash.h @@ -0,0 +1,13 @@ +#ifdef __cplusplus +extern "C" { +#endif + +int crypto_hash( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen + ); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v1/avr8_speed/encrypt.c b/knot/Implementations/crypto_hash/knot256v1/avr8_speed/encrypt.c new file mode 100644 index 0000000..baf0a3b --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/avr8_speed/encrypt.c @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#include "config.h" + +extern void crypto_aead_encrypt_asm( + unsigned char *c, + const unsigned char *m, + unsigned char mlen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern int crypto_aead_decrypt_asm( + unsigned char *m, + const unsigned char *c, + unsigned char clen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern void crypto_hash_asm( + unsigned char *out, + const unsigned char *in, + unsigned char inlen + ); + + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the cipher implementation goes here, + ... generating a ciphertext c[0],c[1],...,c[*clen-1] + ... from a plaintext m[0],m[1],...,m[mlen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce npub[0],npub[1],.. + ... and secret key k[0],k[1],... + ... the implementation shall not use nsec + ... + ... return 0; + */ + + (void)nsec; + + crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k); + + *clen = mlen + TAG_INBYTES; + return 0; +} + + + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the AEAD implementation goes here, + ... generating a plaintext m[0],m[1],...,m[*mlen-1] + ... and secret message number nsec[0],nsec[1],... + ... from a ciphertext c[0],c[1],...,c[clen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce number npub[0],npub[1],... + ... and secret key k[0],k[1],... + ... + ... return 0; + */ + unsigned long long mlen_; + unsigned char tag_is_match; + + (void)nsec; + if (clen < CRYPTO_ABYTES) { + return -1; + } + mlen_ = clen - CRYPTO_ABYTES; + + tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k); + + if (tag_is_match != 0) + { + memset(m, 0, (size_t)mlen_); + return -1; + } + + *mlen = mlen_; + return 0; +} \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v1/avr8_speed/encrypt_core.S b/knot/Implementations/crypto_hash/knot256v1/avr8_speed/encrypt_core.S new file mode 100644 index 0000000..bd74f93 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/avr8_speed/encrypt_core.S @@ -0,0 +1,555 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; S R A M D E F I N I T I O N S +; ============================================ +; +#include +#include "config.h" + +.section .noinit + SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (STATE_INBYTES > 32) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (STATE_INBYTES > 48) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_LEN: .BYTE 0, 0 +#ifdef CRYPTO_AEAD +; For CRYPTO_AEAD + SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0 + SRAM_ADLEN: .BYTE 0, 0 + SRAM_NONCE_ADDR: .BYTE 0, 0 + SRAM_KEY_ADDR: .BYTE 0, 0 + + SRAM_ADDITIONAL: + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (CRYPTO_ABYTES > 16) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (CRYPTO_ABYTES > 24) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + +#endif + +.section .text + +#include "permutation.h" + +; require YH:YL be the address of the current associated data/cipher/message block +; for enc and dec, store ciphertext or plaintext +; require ZH:ZL be the address of the current cipher/message block +.macro XOR_to_State_ENCDEC + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov cnt0, rate +XOR_to_State_loop_ENCDEC: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_to_State_loop_ENCDEC +; YH:YL are now the address of the next associated data block +.endm + +; require YH:YL be the address of the current associated data/cipher/message block +; for enc and dec, store ciphertext or plaintext +; require ZH:ZL be the address of the current cipher/message block +.macro XOR_to_State_AUTH + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov cnt0, rate +XOR_to_State_loop_AUTH: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_to_State_loop_AUTH +; YH:YL are now the address of the next associated data block +.endm + + + +; require YH:YL pointed to the input data +; require ZH:ZL pointed to the output data +; require cnt0 containes the nubmer of bytes in source data +; require number of bytes in source data less than rate, i.e., 0 <= cnt0 < rate +; +; the 0th bit in AEDH is used to distinguish (auth AD) or (enc/dec M/C): +; AEDH[0] = 0 for (auth AD), AEDH[0] = 1 for (enc/dec M/C) +; the 1th bit in AEDH is used to distinguish (auth AD/enc M) or (dec C): +; AEDH[1] = 0 for (auth AD/enc M), AEDH[1] = 1 for (dec C) +; AEDH = 0b000 for (auth AD) +; AEDH = 0b001 for (enc M) +; AEDH = 0b011 for (dec C) +Pad_XOR_to_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + tst cnt0 + breq XOR_padded_data +XOR_source_data_loop: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_source_data_loop +XOR_padded_data: + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 +ret + +AddDomain: + ldi XH, hi8(SRAM_STATE + STATE_INBYTES - 1) + ldi XL, lo8(SRAM_STATE + STATE_INBYTES - 1) + ldi tmp0, DOMAIN_BITS + ld tmp1, X + eor tmp0, tmp1 + st X, tmp0 +ret + +; require ZH:ZL be the address of the destination +EXTRACT_from_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov tmp1, rate +EXTRACT_from_State_loop: + ld tmp0, X+ + st Z+, tmp0 + dec tmp1 + brne EXTRACT_from_State_loop +ret + +AUTH: + tst radlen + breq AUTH_end + + cp radlen, rate + brlo auth_ad_padded_block + +auth_ad_loop: + XOR_to_State_AUTH + rcall Permutation + sub radlen, rate + cp radlen, rate + brlo auth_ad_padded_block + rjmp auth_ad_loop + +auth_ad_padded_block: + mov cnt0, radlen + rcall Pad_XOR_to_State + rcall Permutation + +AUTH_end: +ret + +#ifdef CRYPTO_AEAD +Initialization: + ldi rn, NR_0 + ldi XL, lo8(SRAM_STATE) + ldi XH, hi8(SRAM_STATE) + + lds YH, SRAM_NONCE_ADDR + lds YL, SRAM_NONCE_ADDR + 1 + ldi cnt0, CRYPTO_NPUBBYTES +load_nonce_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_nonce_loop + + lds YH, SRAM_KEY_ADDR + lds YL, SRAM_KEY_ADDR + 1 + ldi cnt0, CRYPTO_KEYBYTES +load_key_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_key_loop + +#if (STATE_INBITS==384) && (RATE_INBITS==192) + ldi cnt0, (STATE_INBYTES - CRYPTO_NPUBBYTES - CRYPTO_KEYBYTES - 1) + clr tmp0 +empty_state_loop: + st X+, tmp0 + dec cnt0 + brne empty_state_loop + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + rcall Permutation +ret + +ENC: + tst mclen + breq ENC_end + + cp mclen, rate + brlo enc_padded_block + +enc_loop: + XOR_to_State_ENCDEC + ldi rn, NR_i + rcall Permutation + sub mclen, rate + cp mclen, rate + brlo enc_padded_block + rjmp enc_loop + +enc_padded_block: + mov cnt0, mclen + rcall Pad_XOR_to_State +ENC_end: +ret + +Finalization: + ldi rate, SQUEEZE_RATE_INBYTES + ldi rn, NR_f + rcall Permutation + rcall EXTRACT_from_State +ret + +; void crypto_aead_encrypt_asm( +; unsigned char *c, +; const unsigned char *m, +; unsigned long long mlen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *c, is passed in r24:r25 +; const unsigned char *m, is passed in r22:r23 +; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_encrypt_asm +crypto_aead_encrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b001 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + rcall Finalization + POP_ALL +ret + +; int crypto_aead_decrypt_asm( +; unsigned char *m, +; const unsigned char *c, +; unsigned long long clen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *m, is passed in r24:r25 +; const unsigned char *c, is passed in r22:r23 +; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_decrypt_asm +crypto_aead_decrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b011 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + + ldi ZH, hi8(SRAM_ADDITIONAL) + ldi ZL, lo8(SRAM_ADDITIONAL) + rcall Finalization + + sbiw ZL, CRYPTO_ABYTES + ldi cnt0, CRYPTO_ABYTES +compare_tag: + ld tmp0, Z+ + ld tmp1, Y+ + cp tmp0, tmp1 + brne return_tag_not_match + dec cnt0 + brne compare_tag + rjmp return_tag_match + +return_tag_not_match: + ldi r25, 0xFF + ldi r24, 0xFF + rjmp crypto_aead_decrypt_end + +return_tag_match: + clr r25 + clr r24 +crypto_aead_decrypt_end: + POP_ALL +ret + +; #ifdef CRYPTO_AEAD +#endif + + +#ifdef CRYPTO_HASH + +; void crypto_hash_asm( +; unsigned char *out, +; const unsigned char *in, +; unsigned long long inlen +; ) +; +; unsigned char *out, is passed in r24:r25 +; const unsigned char *in, is passed in r22:r23 +; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used +.global crypto_hash_asm +crypto_hash_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + mov mclen, r20 + + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi cnt0, STATE_INBYTES - 1 +#else + ldi cnt0, STATE_INBYTES +#endif + clr tmp0 +zero_state: + st X+, tmp0 + dec cnt0 + brne zero_state + +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + ldi rn, NR_h + ldi AEDH, 0b100 + +HASH_ABSORBING: + mov radlen, mclen + tst radlen + breq EMPTY_M + ldi rate, HASH_RATE_INBYTES + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + rcall AUTH + rjmp HASH_SQUEEZING + +EMPTY_M: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 + rcall Permutation + +HASH_SQUEEZING: + ldi rate, HASH_SQUEEZE_RATE_INBYTES + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + ldi tcnt, CRYPTO_BYTES +SQUEEZING_loop: + rcall EXTRACT_from_State + subi tcnt, HASH_SQUEEZE_RATE_INBYTES + breq HASH_SQUEEZING_end + rcall Permutation + rjmp SQUEEZING_loop +HASH_SQUEEZING_end: + POP_ALL +ret + +#endif + + +; Byte Order In AVR 8: +; KNOT-AEAD(128, 256, 64): +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[7] <<< 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Perm_row_1[0] 1 +; N[ 9] AEAD_State[ 9] | Perm_row_1[1] 1 +; N[10] AEAD_State[10] | Perm_row_1[2] 1 +; N[11] AEAD_State[11] | Perm_row_1[3] 1 +; N[12] AEAD_State[12] | Perm_row_1[4] 1 +; N[13] AEAD_State[13] | Perm_row_1[5] 1 +; N[14] AEAD_State[14] | Perm_row_1[6] 1 +; N[15] AEAD_State[15] | Perm_row_1[7] <<< 1 +; K[ 0] AEAD_State[16] | Perm_row_2[0] 8 +; K[ 1] AEAD_State[17] | Perm_row_2[1] 8 +; K[ 2] AEAD_State[18] | Perm_row_2[2] 8 +; K[ 3] AEAD_State[19] | Perm_row_2[3] 8 +; K[ 4] AEAD_State[20] | Perm_row_2[4] 8 +; K[ 5] AEAD_State[21] | Perm_row_2[5] 8 +; K[ 6] AEAD_State[22] | Perm_row_2[6] 8 +; K[ 7] AEAD_State[23] | Perm_row_2[7] <<< 8 +; K[ 8] AEAD_State[24] | Perm_row_3[0] 25 +; K[ 9] AEAD_State[25] | Perm_row_3[1] 25 +; K[10] AEAD_State[26] | Perm_row_3[2] 25 +; K[11] AEAD_State[27] | Perm_row_3[3] 25 +; K[12] AEAD_State[28] | Perm_row_3[4] 25 +; K[13] AEAD_State[29] | Perm_row_3[5] 25 +; K[14] AEAD_State[30] | Perm_row_3[6] 25 +; K[15] AEAD_State[31] | ^0x80 Perm_row_3[7] <<< 25 +; +; +; KNOT-AEAD(128, 384, 192): +; Initalization +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[ 0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[ 1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[ 2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[ 3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[ 4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[ 5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[ 6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[ 7] 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Message[ 8] 0x00 Perm_row_0[ 8] 0 Tag[ 8] +; N[ 9] AEAD_State[ 9] | Message[ 9] 0x00 Perm_row_0[ 9] 0 Tag[ 9] +; N[10] AEAD_State[10] | Message[10] 0x00 Perm_row_0[10] 0 Tag[10] +; N[11] AEAD_State[11] | Message[11] 0x00 Perm_row_0[11] <<< 0 Tag[11] +; N[12] AEAD_State[12] | Message[12] 0x00 Perm_row_1[ 0] 1 Tag[12] +; N[13] AEAD_State[13] | Message[13] 0x00 Perm_row_1[ 1] 1 Tag[13] +; N[14] AEAD_State[14] | Message[14] 0x00 Perm_row_1[ 2] 1 Tag[14] +; N[15] AEAD_State[15] | Message[15] 0x00 Perm_row_1[ 3] 1 Tag[15] +; K[ 0] AEAD_State[16] | Message[16] 0x00 Perm_row_1[ 4] 1 +; K[ 1] AEAD_State[17] | Message[17] 0x00 Perm_row_1[ 5] 1 +; K[ 2] AEAD_State[18] | Message[18] 0x00 Perm_row_1[ 6] 1 +; K[ 3] AEAD_State[19] | Message[19] 0x00 Perm_row_1[ 7] 1 +; K[ 4] AEAD_State[20] | Message[20] 0x00 Perm_row_1[ 8] 1 +; K[ 5] AEAD_State[21] | Message[21] 0x00 Perm_row_1[ 9] 1 +; K[ 6] AEAD_State[22] | Message[22] 0x00 Perm_row_1[10] 1 +; K[ 7] AEAD_State[23] | Message[23] 0x00 Perm_row_1[11] <<< 1 +; K[ 8] AEAD_State[24] | Perm_row_2[ 0] 8 +; K[ 9] AEAD_State[25] | Perm_row_2[ 1] 8 +; K[10] AEAD_State[26] | Perm_row_2[ 2] 8 +; K[11] AEAD_State[27] | Perm_row_2[ 3] 8 +; K[12] AEAD_State[28] | Perm_row_2[ 4] 8 +; K[13] AEAD_State[29] | Perm_row_2[ 5] 8 +; K[14] AEAD_State[30] | Perm_row_2[ 6] 8 +; K[15] AEAD_State[31] | Perm_row_2[ 7] 8 +; 0x00 AEAD_State[32] | Perm_row_2[ 8] 8 +; 0x00 AEAD_State[33] | Perm_row_2[ 9] 8 +; 0x00 AEAD_State[34] | Perm_row_2[10] 8 +; 0x00 AEAD_State[35] | Perm_row_2[11] <<< 8 +; 0x00 AEAD_State[36] | Perm_row_3[ 0] 55 +; 0x00 AEAD_State[37] | Perm_row_3[ 1] 55 +; 0x00 AEAD_State[38] | Perm_row_3[ 2] 55 +; 0x00 AEAD_State[39] | Perm_row_3[ 3] 55 +; 0x00 AEAD_State[40] | Perm_row_3[ 4] 55 +; 0x00 AEAD_State[41] | Perm_row_3[ 5] 55 +; 0x00 AEAD_State[42] | Perm_row_3[ 6] 55 +; 0x00 AEAD_State[43] | Perm_row_3[ 7] 55 +; 0x00 AEAD_State[44] | Perm_row_3[ 8] 55 +; 0x00 AEAD_State[45] | Perm_row_3[ 9] 55 +; 0x00 AEAD_State[46] | Perm_row_3[10] 55 +; 0x00 ^0x80 AEAD_State[47] | ^0x80 Perm_row_3[11] <<< 55 diff --git a/knot/Implementations/crypto_hash/knot256v1/avr8_speed/hash.c b/knot/Implementations/crypto_hash/knot256v1/avr8_speed/hash.c new file mode 100644 index 0000000..dbbe4df --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/avr8_speed/hash.c @@ -0,0 +1,32 @@ +#include +#include +#include +#include +#include "api.h" +#include "crypto_hash.h" + +extern void crypto_hash_asm( + unsigned char *out, + const unsigned char *in, + unsigned char inlen + ); + +int crypto_hash( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen +) +{ + /* + ... + ... the code for the hash function implementation goes here + ... generating a hash value out[0],out[1],...,out[CRYPTO_BYTES-1] + ... from a message in[0],in[1],...,in[in-1] + ... + ... return 0; + */ + + crypto_hash_asm(out, in, inlen); + + return 0; +} \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v1/avr8_speed/knot256.h b/knot/Implementations/crypto_hash/knot256v1/avr8_speed/knot256.h new file mode 100644 index 0000000..f99f68b --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/avr8_speed/knot256.h @@ -0,0 +1,306 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; +#define x10 r0 +#define x11 r1 +#define x12 r2 +#define x13 r3 +#define x14 r4 +#define x15 r5 +#define x16 r6 +#define x17 r7 + +; an intentionally arrangement of registers to facilitate movw +#define x20 r8 +#define x21 r10 +#define x22 r12 +#define x23 r14 +#define x24 r9 +#define x25 r11 +#define x26 r13 +#define x27 r15 + +; an intentionally arrangement of registers to facilitate movw +#define x30 r16 +#define x35 r18 +#define x32 r20 +#define x37 r22 +#define x34 r17 +#define x31 r19 +#define x36 r21 +#define x33 r23 + +#define t0j r24 +#define t1j r25 +#define x0j r27 + +#include "assist.h" + +.macro Sbox i0, i1, i2, i3 + mov t0j, \i1 + com \i0 + and \i1, \i0 + eor \i1, \i2 + or \i2, t0j + eor \i0, \i3 + eor \i2, \i0 + eor t0j, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, t0j + and t0j, \i2 + eor \i1, t0j +.endm + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + ld x10, Y+ + ld x11, Y+ + ld x12, Y+ + ld x13, Y+ + ld x14, Y+ + ld x15, Y+ + ld x16, Y+ + ld x17, Y+ + ld x20, Y+ + ld x21, Y+ + ld x22, Y+ + ld x23, Y+ + ld x24, Y+ + ld x25, Y+ + ld x26, Y+ + ld x27, Y+ + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH + rjmp For_Hash +For_AEAD: + ldi ZL, lo8(RC_LFSR6) + ldi ZH, hi8(RC_LFSR6) + rjmp round_loop_start +For_Hash: + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) +#elif defined(CRYPTO_AEAD) + ldi ZL, lo8(RC_LFSR6) + ldi ZH, hi8(RC_LFSR6) +#else + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) +#endif + +round_loop_start: + ; AddRC + lpm t0j, Z+ + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + ld x0j, Y + eor x0j, t0j + + ; SubColumns + Sbox x0j, x10, x20, x30 + st Y+, x0j + ld x0j, Y + Sbox x0j, x11, x21, x31 + st Y+, x0j + ld x0j, Y + Sbox x0j, x12, x22, x32 + st Y+, x0j + ld x0j, Y + Sbox x0j, x13, x23, x33 + st Y+, x0j + ld x0j, Y + Sbox x0j, x14, x24, x34 + st Y+, x0j + ld x0j, Y + Sbox x0j, x15, x25, x35 + st Y+, x0j + ld x0j, Y + Sbox x0j, x16, x26, x36 + st Y+, x0j + ld x0j, Y + Sbox x0j, x17, x27, x37 + st Y, x0j + + ; ShiftRows + ; <<< 1 + mov t0j, x17 + rol t0j + rol x10 + rol x11 + rol x12 + rol x13 + rol x14 + rol x15 + rol x16 + rol x17 + + ; <<< 8 + ; 7 6 5 4 3 2 1 0 => 6 5 4 3 2 1 0 7 + ;mov t0j, x27 + ;mov x27, x26 + ;mov x26, x25 + ;mov x25, x24 + ;mov x24, x23 + ;mov x23, x22 + ;mov x22, x21 + ;mov x21, x20 + ;mov x20, t0j + ; an intentionally arrangement of registers to facilitate movw + movw t0j, x23 ; t1j:t0j <= x27:x23 + movw x23, x22 ; x27:x23 <= x26:x22 + movw x22, x21 ; x26:x22 <= x25:x21 + movw x21, x20 ; x25:x21 <= x24:x20 + mov x20, t1j ; x20 <= t1j + mov x24, t0j ; x24 <= t0j + + ; <<< 1 + mov t0j, x37 + rol t0j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + ; <<< 24 + ; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5 + ;mov t0j, x30 + ;mov x30, x35 + ;mov x35, x32 + ;mov x32, x37 + ;mov x37, x34 + ;mov x34, x31 + ;mov x31, x36 + ;mov x36, x33 + ;mov x33, t0j + ; an intentionally arrangement of registers to facilitate movw + ;x30 r16 + ;x35 r18 + ;x32 r20 + ;x37 r22 + ;x34 r17 + ;x31 r19 + ;x36 r21 + ;x33 r23 + movw t0j, x30 ; t1j:t0j <= x34:x30 + movw x30, x35 ; x34:x30 <= x31:x35 + movw x35, x32 ; x31:x35 <= x36:x32 + movw x32, x37 ; x36:x32 <= x33:x37 + mov x37, t1j ; x37 <= x34 + mov x33, t0j ; x33 <= x30 + + dec rcnt + breq round_loop_end + jmp round_loop_start + +round_loop_end: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + st Y+, x10 + st Y+, x11 + st Y+, x12 + st Y+, x13 + st Y+, x14 + st Y+, x15 + st Y+, x16 + st Y+, x17 + st Y+, x20 + st Y+, x21 + st Y+, x22 + st Y+, x23 + st Y+, x24 + st Y+, x25 + st Y+, x26 + st Y+, x27 + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + + POP_CONFLICT +ret + + +.section .text +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +RC_LFSR6: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06 +.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29 +.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28 +.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24 +.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37 +.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26 +.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f +.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00 +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +#elif defined(CRYPTO_AEAD) +RC_LFSR6: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06 +.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29 +.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28 +.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24 +.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37 +.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26 +.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f +.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00 +#else +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v1/avr8_speed/knot384.h b/knot/Implementations/crypto_hash/knot256v1/avr8_speed/knot384.h new file mode 100644 index 0000000..0b3dd75 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/avr8_speed/knot384.h @@ -0,0 +1,261 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; + +; an intentionally arrangement of registers to facilitate movw +#define x20 r0 +#define x21 r2 +#define x22 r4 +#define x23 r6 +#define x24 r8 +#define x25 r10 +#define x26 r1 +#define x27 r3 +#define x28 r5 +#define x29 r7 +#define x2a r9 +#define x2b r11 + +; an intentionally arrangement of registers to facilitate movw +#define x30 r22 +#define x35 r20 +#define x3a r18 +#define x33 r16 +#define x38 r14 +#define x31 r12 +#define x36 r23 +#define x3b r21 +#define x34 r19 +#define x39 r17 +#define x32 r15 +#define x37 r13 + +#define t0j r24 +#define t1j r25 +#define x0j r25 +#define x1j r27 + +#include "assist.h" + +.macro Sbox i0, i1, i2, i3 + ldi t0j, 0xFF + eor \i0, t0j + mov t0j, \i1 + and \i1, \i0 + eor \i1, \i2 + or \i2, t0j + eor \i0, \i3 + eor \i2, \i0 + eor t0j, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, t0j + and t0j, \i2 + eor \i1, t0j +.endm + +.macro OneColumn i0, i1, i2, i3 + ld \i0, Y + ldd \i1, Y + ROW_INBYTES + Sbox \i0, \i1, \i2, \i3 + st Y+, \i0 + rol \i1 ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES -1, \i1 +.endm + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + ld x20, Y+ + ld x21, Y+ + ld x22, Y+ + ld x23, Y+ + ld x24, Y+ + ld x25, Y+ + ld x26, Y+ + ld x27, Y+ + ld x28, Y+ + ld x29, Y+ + ld x2a, Y+ + ld x2b, Y+ + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) + +round_loop_start: + ; AddRC + lpm t0j, Z+ + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + ld x0j, Y + eor x0j, t0j + + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x20, x30 + st Y+, x0j + lsl x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES -1, x1j + + OneColumn x0j, x1j, x21, x31 + OneColumn x0j, x1j, x22, x32 + OneColumn x0j, x1j, x23, x33 + OneColumn x0j, x1j, x24, x34 + OneColumn x0j, x1j, x25, x35 + OneColumn x0j, x1j, x26, x36 + OneColumn x0j, x1j, x27, x37 + OneColumn x0j, x1j, x28, x38 + OneColumn x0j, x1j, x29, x39 + OneColumn x0j, x1j, x2a, x3a + OneColumn x0j, x1j, x2b, x3b + + ld x1j, Y + eor t0j, t0j + adc x1j, t0j + st Y, x1j + + ; b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- x- -- 1 + ; -- -- -- -- x' -- -- -- -- -- -- -- 7 + ; 4 3 2 1 0 b a 9 8 7 6 5 + + ; ShiftRows -- the last two rows + ; <<< 8 + ; b a 9 8 7 6 5 4 3 2 1 0 => a 9 8 7 6 5 4 3 2 1 0 b + movw t0j, x25 ; t1j:t0j <= x2b:x25 + movw x25, x24 ; x2b:x25 <= x2a:x24 + movw x24, x23 ; x2a:x24 <= x29:x23 + movw x23, x22 ; x29:x23 <= x28:x22 + movw x22, x21 ; x28:x22 <= x27:x21 + movw x21, x20 ; x27:x21 <= x26:x20 + mov x26, t0j ; x26 <= x25 + mov x20, t1j ; x20 <= x2b + + ; >>> 1 + mov t0j, x3b + ror t0j + ror x3a + ror x39 + ror x38 + ror x37 + ror x36 + ror x35 + ror x34 + ror x33 + ror x32 + ror x31 + ror x30 + ror x3b + ; <<< 56 + ; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5 + ; mov x3j, x30 + ; mov x30, x35 + ; mov x35, x3a + ; mov x3a, x33 + ; mov x33, x38 + ; mov x38, x31 + ; mov x31, x36 + ; mov x36, x3b + ; mov x3b, x34 + ; mov x34, x39 + ; mov x39, x32 + ; mov x32, x37 + ; mov x37, x3j + ; an intentionally arrangement of registers to facilitate movw + ; x30 r22 + ; x35 r20 + ; x3a r18 + ; x33 r16 + ; x38 r14 + ; x31 r12 + ; x36 r23 + ; x3b r21 + ; x34 r19 + ; x39 r17 + ; x32 r15 + ; x37 r13 + movw t0j, x30 ; t1j:t0j <= x36:x30 + movw x30, x35 ; x36:x30 <= x3b:x35 + movw x35, x3a ; x3b:x35 <= x34:x3a + movw x3a, x33 ; x34:x3a <= x39:x33 + movw x33, x38 ; x39:x33 <= x32:x38 + movw x38, x31 ; x32:x38 <= x37:x31 + mov x31, t1j ; x31 <= x36 + mov x37, t0j ; x37 <= x30 + + dec rcnt + breq round_loop_end + jmp round_loop_start + +round_loop_end: + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + st Y+, x20 + st Y+, x21 + st Y+, x22 + st Y+, x23 + st Y+, x24 + st Y+, x25 + st Y+, x26 + st Y+, x27 + st Y+, x28 + st Y+, x29 + st Y+, x2a + st Y+, x2b + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + + POP_CONFLICT +ret + +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v1/avr8_speed/knot512.h b/knot/Implementations/crypto_hash/knot256v1/avr8_speed/knot512.h new file mode 100644 index 0000000..b0e4319 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/avr8_speed/knot512.h @@ -0,0 +1,435 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; +#define x20 r0 +#define x22 r2 +#define x24 r4 +#define x26 r6 +#define x28 r1 +#define x2a r3 +#define x2c r5 +#define x2e r7 + +#define x30 r8 +#define x3d r10 +#define x3a r12 +#define x37 r14 +#define x34 r16 +#define x31 r18 +#define x3e r20 +#define x3b r22 +#define x38 r9 +#define x35 r11 +#define x32 r13 +#define x3f r15 +#define x3c r17 +#define x39 r19 +#define x36 r21 +#define x33 r23 + +#define t0j r24 +#define t1j r25 +#define x0j r25 +#define x1j r27 +#define x2j r26 + +#include "assist.h" + +.macro Sbox i0, i1, i2, i3 + ldi t0j, 0xFF + eor \i0, t0j + mov t0j, \i1 + and \i1, \i0 + eor \i1, \i2 + or \i2, t0j + eor \i0, \i3 + eor \i2, \i0 + eor t0j, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, t0j + and t0j, \i2 + eor \i1, t0j +.endm + +.macro TwoColumns i2_e, i3_e, i3_o + ; column 2i + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, \i2_e, \i3_e + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ; column 2i+1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x2j, \i3_o + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + ldd t0j, Y + 2 * ROW_INBYTES + 1 + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t0j +.endm + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + push rcnt + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + + ldd x20, Y + 0x00 + ldd x22, Y + 0x02 + ldd x24, Y + 0x04 + ldd x26, Y + 0x06 + ldd x28, Y + 0x08 + ldd x2a, Y + 0x0a + ldd x2c, Y + 0x0c + ldd x2e, Y + 0x0e + + adiw YL, ROW_INBYTES + + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + ld x3c, Y+ + ld x3d, Y+ + ld x3e, Y+ + ld x3f, Y+ + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH + rjmp For_Hash +For_AEAD: + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) + rjmp round_loop_start +For_Hash: + ldi ZL, lo8(RC_LFSR8) + ldi ZH, hi8(RC_LFSR8) +#elif defined(CRYPTO_AEAD) + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) +#else + ldi ZL, lo8(RC_LFSR8) + ldi ZH, hi8(RC_LFSR8) +#endif + + +round_loop_start: + ; AddRC + lpm t0j, Z+ + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + + ; column 0 + ld x0j, Y + eor x0j, t0j + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x20, x30 + st Y+, x0j + lsl x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ; column 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + Sbox x0j, x1j, x2j, x31 + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + ldd t0j, Y + 2 * ROW_INBYTES + 1 + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t0j + + ; column 2, 3 + TwoColumns x22, x32, x33 + ; column 4, 5 + TwoColumns x24, x34, x35 + ; column 6, 7 + TwoColumns x26, x36, x37 + ; column 8, 9 + TwoColumns x28, x38, x39 + ; column 10, 11 + TwoColumns x2a, x3a, x3b + ; column 12, 13 + TwoColumns x2c, x3c, x3d + + ; column 14 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x2e, x3e + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ; column 15 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x2j, x3f + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ld x1j, Y + eor t0j, t0j + adc x1j, t0j + st Y, x1j + std Y + ROW_INBYTES + 1, x2j + + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2 + ; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3 + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + ; x2e x2c x2a x28 x26 x24 x22 x20 => x2c x2a x28 x26 x24 x22 x20 x2e + ;mov t0j, x2e + ;mov x2e, x2c + ;mov x2c, x2a + ;mov x2a, x28 + ;mov x28, x26 + ;mov x26, x24 + ;mov x24, x22 + ;mov x22, x20 + ;mov x20, t0j + ; an intentionally arrangement of registers to facilitate movw + movw t0j, x26 ; t1j:t0j <= x2e:x26 + movw x26, x24 ; x2e:x26 <= x2c:x24 + movw x24, x22 ; x2c:x24 <= x2a:x22 + movw x22, x20 ; x2a:x22 <= x28:x20 + mov x20, t1j ; x20 <= t1j + mov x28, t0j ; x28 <= t0j + + ; <<< 1 + mov t0j, x3f + rol t0j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + rol x38 + rol x39 + rol x3a + rol x3b + rol x3c + rol x3d + rol x3e + rol x3f + ; <<< 24 + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 => + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + ; mov x3j, x30 + ; mov x30, x3d + ; mov x3d, x3a + ; mov x3a, x37 + ; mov x37, x34 + ; mov x34, x31 + ; mov x31, x3e + ; mov x3e, x3b + ; mov x3b, x38 + ; mov x38, x35 + ; mov x35, x32 + ; mov x32, x3f + ; mov x3f, x3c + ; mov x3c, x39 + ; mov x39, x36 + ; mov x36, x33 + ; mov x33, x3j + ; an intentionally arrangement of registers to facilitate movw + ; x30 r8 + ; x3d r10 + ; x3a r12 + ; x37 r14 + ; x34 r16 + ; x31 r18 + ; x3e r20 + ; x3b r22 + ; x38 r9 + ; x35 r11 + ; x32 r13 + ; x3f r15 + ; x3c r17 + ; x39 r19 + ; x36 r21 + ; x33 r23 + movw t0j, x30 ; t1j:t0j <= x38:x30 + movw x30, x3d ; x38:x30 <= x35:x3d + movw x3d, x3a ; x35:x3d <= x32:x3a + movw x3a, x37 ; x32:x3a <= x3f:x37 + movw x37, x34 ; x3f:x37 <= x3c:x34 + movw x34, x31 ; x3c:x34 <= x39:x31 + movw x31, x3e ; x39:x31 <= x36:x3e + movw x3e, x3b ; x36:x3e <= x33:x3b + mov x3b, t1j ; x3b <= x38 + mov x33, t0j ; x33 <= x30 + + pop rcnt + dec rcnt + push rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + pop rcnt + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + std Y + 0x00, x20 + std Y + 0x02, x22 + std Y + 0x04, x24 + std Y + 0x06, x26 + std Y + 0x08, x28 + std Y + 0x0a, x2a + std Y + 0x0c, x2c + std Y + 0x0e, x2e + adiw YL, ROW_INBYTES + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + st Y+, x3c + st Y+, x3d + st Y+, x3e + st Y+, x3f + + POP_CONFLICT +ret + +.section .text +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +RC_LFSR8: +.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e +.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25 +.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0 +.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9 +.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37 +.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20 +.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad +.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac +.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0 +.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb +.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a +.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8 +.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86 +.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7 +.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c +.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5 +.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4 +.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52 +.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7 +.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76 +.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b +.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4 +.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd +.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46 +.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e +.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab +.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5 +.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09 +.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe +.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17 +.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d +.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00 +#elif defined(CRYPTO_AEAD) +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +#else +RC_LFSR8: +.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e +.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25 +.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0 +.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9 +.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37 +.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20 +.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad +.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac +.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0 +.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb +.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a +.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8 +.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86 +.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7 +.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c +.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5 +.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4 +.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52 +.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7 +.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76 +.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b +.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4 +.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd +.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46 +.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e +.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab +.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5 +.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09 +.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe +.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17 +.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d +.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00 +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v1/avr8_speed/permutation.h b/knot/Implementations/crypto_hash/knot256v1/avr8_speed/permutation.h new file mode 100644 index 0000000..e6c9793 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/avr8_speed/permutation.h @@ -0,0 +1,45 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; R E G I S T E R D E F I N I T I O N S +; ============================================ +; + +#define mclen r16 +#define radlen r17 +#define tcnt r17 +#define tmp0 r20 +#define tmp1 r21 +#define cnt0 r22 +#define rn r23 +#define rate r24 + + +; AEDH = 0b000: for authenticate AD +; AEDH = 0b001: for encryption +; AEDH = 0b011: for decryption +; AEDH = 0b100: for hash +#define AEDH r25 +#define rcnt r26 + +#if (STATE_INBITS==256) +#include "knot256.h" +#elif (STATE_INBITS==384) +#include "knot384.h" +#elif (STATE_INBITS==512) +#include "knot512.h" +#else +#error "Not specified key size and state size" +#endif + + diff --git a/knot/Implementations/crypto_hash/knot256v2/avr8_speed/api.h b/knot/Implementations/crypto_hash/knot256v2/avr8_speed/api.h new file mode 100644 index 0000000..cb530c7 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/avr8_speed/api.h @@ -0,0 +1 @@ +#define CRYPTO_BYTES 32 \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v2/avr8_speed/assist.h b/knot/Implementations/crypto_hash/knot256v2/avr8_speed/assist.h new file mode 100644 index 0000000..f95a717 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/avr8_speed/assist.h @@ -0,0 +1,86 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; +.macro PUSH_CONFLICT + push r16 + push r17 + push r18 + push r19 + + push r23 + push r24 + push r25 + push r26 + push r27 + push r28 + push r29 + push r30 + push r31 +.endm + +.macro POP_CONFLICT + pop r31 + pop r30 + pop r29 + pop r28 + pop r27 + pop r26 + pop r25 + pop r24 + pop r23 + + pop r19 + pop r18 + pop r17 + pop r16 +.endm + +.macro PUSH_ALL + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 +.endm + +.macro POP_ALL + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + clr r1 +.endm \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v2/avr8_speed/config.h b/knot/Implementations/crypto_hash/knot256v2/avr8_speed/config.h new file mode 100644 index 0000000..f6fb82b --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/avr8_speed/config.h @@ -0,0 +1,131 @@ +#ifndef __CONFIG_H__ +#define __CONFIG_H__ + +//#define CRYPTO_AEAD +#define CRYPTO_HASH + +#define MAX_MESSAGE_LENGTH 128 + +#define STATE_INBITS 384 +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBITS 128 +/* For CRYPTO_HASH */ +#define CRYPTO_BITS 256 + +#define STATE_INBYTES ((STATE_INBITS + 7) / 8) +#define ROW_INBITS ((STATE_INBITS + 3) / 4) +#define ROW_INBYTES ((ROW_INBITS + 7) / 8) + +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8) +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES +#define CRYPTO_ABYTES CRYPTO_KEYBYTES +#define CRYPTO_NOOVERLAP 1 + +#define MAX_ASSOCIATED_DATA_LENGTH 32 +#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES) + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +/* For CRYPTO_HASH */ +#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8) + + + +#define DOMAIN_BITS 0x80 +#define PAD_BITS 0x01 +#define S384_R192_BITS 0x80 + +#if (STATE_INBITS==256) +#define C1 1 +#define C2 8 +#define C3 25 +#elif (STATE_INBITS==384) +#define C1 1 +#define C2 8 +#define C3 55 +#elif (STATE_INBITS==512) +#define C1 1 +#define C2 16 +#define C3 25 +#else +#error "Not specified state size" +#endif + +#ifdef CRYPTO_AEAD +/* For CRYPTO_AEAD */ +#define KEY_INBITS (CRYPTO_KEYBYTES * 8) +#define KEY_INBYTES (CRYPTO_KEYBYTES) + +#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8) +#define NONCE_INBYTES (CRYPTO_NPUBBYTES) + +#define TAG_INBITS (CRYPTO_ABYTES * 8) +#define TAG_INBYTES (CRYPTO_ABYTES) + +#if (KEY_INBITS==128) && (STATE_INBITS==256) +#define RATE_INBITS 64 +#define NR_0 52 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==128) && (STATE_INBITS==384) +#define RATE_INBITS 192 +#define NR_0 76 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==192) && (STATE_INBITS==384) +#define RATE_INBITS 96 +#define NR_0 76 +#define NR_i 40 +#define NR_f 44 +#elif (KEY_INBITS==256) && (STATE_INBITS==512) +#define RATE_INBITS 128 +#define NR_0 100 +#define NR_i 52 +#define NR_f 56 +#else +#error "Not specified key size and state size" +#endif + +#define RATE_INBYTES ((RATE_INBITS + 7) / 8) +#define SQUEEZE_RATE_INBYTES TAG_INBYTES + +#endif + +#ifdef CRYPTO_HASH +/* For CRYPTO_HASH */ +#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8) + +#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256) +#define HASH_RATE_INBITS 32 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 68 +#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 128 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 80 +#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 48 +#define HASH_SQUEEZE_RATE_INBITS 192 +#define NR_h 104 +#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512) +#define HASH_RATE_INBITS 64 +#define HASH_SQUEEZE_RATE_INBITS 256 +#define NR_h 140 +#else +#error "Not specified hash digest size and state size" +#endif + +#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8) +#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8) + +#endif + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v2/avr8_speed/crypto_hash.h b/knot/Implementations/crypto_hash/knot256v2/avr8_speed/crypto_hash.h new file mode 100644 index 0000000..342a639 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/avr8_speed/crypto_hash.h @@ -0,0 +1,13 @@ +#ifdef __cplusplus +extern "C" { +#endif + +int crypto_hash( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen + ); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v2/avr8_speed/encrypt.c b/knot/Implementations/crypto_hash/knot256v2/avr8_speed/encrypt.c new file mode 100644 index 0000000..baf0a3b --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/avr8_speed/encrypt.c @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#include "config.h" + +extern void crypto_aead_encrypt_asm( + unsigned char *c, + const unsigned char *m, + unsigned char mlen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern int crypto_aead_decrypt_asm( + unsigned char *m, + const unsigned char *c, + unsigned char clen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern void crypto_hash_asm( + unsigned char *out, + const unsigned char *in, + unsigned char inlen + ); + + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the cipher implementation goes here, + ... generating a ciphertext c[0],c[1],...,c[*clen-1] + ... from a plaintext m[0],m[1],...,m[mlen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce npub[0],npub[1],.. + ... and secret key k[0],k[1],... + ... the implementation shall not use nsec + ... + ... return 0; + */ + + (void)nsec; + + crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k); + + *clen = mlen + TAG_INBYTES; + return 0; +} + + + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the AEAD implementation goes here, + ... generating a plaintext m[0],m[1],...,m[*mlen-1] + ... and secret message number nsec[0],nsec[1],... + ... from a ciphertext c[0],c[1],...,c[clen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce number npub[0],npub[1],... + ... and secret key k[0],k[1],... + ... + ... return 0; + */ + unsigned long long mlen_; + unsigned char tag_is_match; + + (void)nsec; + if (clen < CRYPTO_ABYTES) { + return -1; + } + mlen_ = clen - CRYPTO_ABYTES; + + tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k); + + if (tag_is_match != 0) + { + memset(m, 0, (size_t)mlen_); + return -1; + } + + *mlen = mlen_; + return 0; +} \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v2/avr8_speed/encrypt_core.S b/knot/Implementations/crypto_hash/knot256v2/avr8_speed/encrypt_core.S new file mode 100644 index 0000000..bd74f93 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/avr8_speed/encrypt_core.S @@ -0,0 +1,555 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; S R A M D E F I N I T I O N S +; ============================================ +; +#include +#include "config.h" + +.section .noinit + SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (STATE_INBYTES > 32) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (STATE_INBYTES > 48) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_LEN: .BYTE 0, 0 +#ifdef CRYPTO_AEAD +; For CRYPTO_AEAD + SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0 + SRAM_ADLEN: .BYTE 0, 0 + SRAM_NONCE_ADDR: .BYTE 0, 0 + SRAM_KEY_ADDR: .BYTE 0, 0 + + SRAM_ADDITIONAL: + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (CRYPTO_ABYTES > 16) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (CRYPTO_ABYTES > 24) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + +#endif + +.section .text + +#include "permutation.h" + +; require YH:YL be the address of the current associated data/cipher/message block +; for enc and dec, store ciphertext or plaintext +; require ZH:ZL be the address of the current cipher/message block +.macro XOR_to_State_ENCDEC + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov cnt0, rate +XOR_to_State_loop_ENCDEC: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_to_State_loop_ENCDEC +; YH:YL are now the address of the next associated data block +.endm + +; require YH:YL be the address of the current associated data/cipher/message block +; for enc and dec, store ciphertext or plaintext +; require ZH:ZL be the address of the current cipher/message block +.macro XOR_to_State_AUTH + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov cnt0, rate +XOR_to_State_loop_AUTH: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_to_State_loop_AUTH +; YH:YL are now the address of the next associated data block +.endm + + + +; require YH:YL pointed to the input data +; require ZH:ZL pointed to the output data +; require cnt0 containes the nubmer of bytes in source data +; require number of bytes in source data less than rate, i.e., 0 <= cnt0 < rate +; +; the 0th bit in AEDH is used to distinguish (auth AD) or (enc/dec M/C): +; AEDH[0] = 0 for (auth AD), AEDH[0] = 1 for (enc/dec M/C) +; the 1th bit in AEDH is used to distinguish (auth AD/enc M) or (dec C): +; AEDH[1] = 0 for (auth AD/enc M), AEDH[1] = 1 for (dec C) +; AEDH = 0b000 for (auth AD) +; AEDH = 0b001 for (enc M) +; AEDH = 0b011 for (dec C) +Pad_XOR_to_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + tst cnt0 + breq XOR_padded_data +XOR_source_data_loop: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_source_data_loop +XOR_padded_data: + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 +ret + +AddDomain: + ldi XH, hi8(SRAM_STATE + STATE_INBYTES - 1) + ldi XL, lo8(SRAM_STATE + STATE_INBYTES - 1) + ldi tmp0, DOMAIN_BITS + ld tmp1, X + eor tmp0, tmp1 + st X, tmp0 +ret + +; require ZH:ZL be the address of the destination +EXTRACT_from_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov tmp1, rate +EXTRACT_from_State_loop: + ld tmp0, X+ + st Z+, tmp0 + dec tmp1 + brne EXTRACT_from_State_loop +ret + +AUTH: + tst radlen + breq AUTH_end + + cp radlen, rate + brlo auth_ad_padded_block + +auth_ad_loop: + XOR_to_State_AUTH + rcall Permutation + sub radlen, rate + cp radlen, rate + brlo auth_ad_padded_block + rjmp auth_ad_loop + +auth_ad_padded_block: + mov cnt0, radlen + rcall Pad_XOR_to_State + rcall Permutation + +AUTH_end: +ret + +#ifdef CRYPTO_AEAD +Initialization: + ldi rn, NR_0 + ldi XL, lo8(SRAM_STATE) + ldi XH, hi8(SRAM_STATE) + + lds YH, SRAM_NONCE_ADDR + lds YL, SRAM_NONCE_ADDR + 1 + ldi cnt0, CRYPTO_NPUBBYTES +load_nonce_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_nonce_loop + + lds YH, SRAM_KEY_ADDR + lds YL, SRAM_KEY_ADDR + 1 + ldi cnt0, CRYPTO_KEYBYTES +load_key_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_key_loop + +#if (STATE_INBITS==384) && (RATE_INBITS==192) + ldi cnt0, (STATE_INBYTES - CRYPTO_NPUBBYTES - CRYPTO_KEYBYTES - 1) + clr tmp0 +empty_state_loop: + st X+, tmp0 + dec cnt0 + brne empty_state_loop + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + rcall Permutation +ret + +ENC: + tst mclen + breq ENC_end + + cp mclen, rate + brlo enc_padded_block + +enc_loop: + XOR_to_State_ENCDEC + ldi rn, NR_i + rcall Permutation + sub mclen, rate + cp mclen, rate + brlo enc_padded_block + rjmp enc_loop + +enc_padded_block: + mov cnt0, mclen + rcall Pad_XOR_to_State +ENC_end: +ret + +Finalization: + ldi rate, SQUEEZE_RATE_INBYTES + ldi rn, NR_f + rcall Permutation + rcall EXTRACT_from_State +ret + +; void crypto_aead_encrypt_asm( +; unsigned char *c, +; const unsigned char *m, +; unsigned long long mlen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *c, is passed in r24:r25 +; const unsigned char *m, is passed in r22:r23 +; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_encrypt_asm +crypto_aead_encrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b001 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + rcall Finalization + POP_ALL +ret + +; int crypto_aead_decrypt_asm( +; unsigned char *m, +; const unsigned char *c, +; unsigned long long clen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *m, is passed in r24:r25 +; const unsigned char *c, is passed in r22:r23 +; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_decrypt_asm +crypto_aead_decrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b011 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + + ldi ZH, hi8(SRAM_ADDITIONAL) + ldi ZL, lo8(SRAM_ADDITIONAL) + rcall Finalization + + sbiw ZL, CRYPTO_ABYTES + ldi cnt0, CRYPTO_ABYTES +compare_tag: + ld tmp0, Z+ + ld tmp1, Y+ + cp tmp0, tmp1 + brne return_tag_not_match + dec cnt0 + brne compare_tag + rjmp return_tag_match + +return_tag_not_match: + ldi r25, 0xFF + ldi r24, 0xFF + rjmp crypto_aead_decrypt_end + +return_tag_match: + clr r25 + clr r24 +crypto_aead_decrypt_end: + POP_ALL +ret + +; #ifdef CRYPTO_AEAD +#endif + + +#ifdef CRYPTO_HASH + +; void crypto_hash_asm( +; unsigned char *out, +; const unsigned char *in, +; unsigned long long inlen +; ) +; +; unsigned char *out, is passed in r24:r25 +; const unsigned char *in, is passed in r22:r23 +; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used +.global crypto_hash_asm +crypto_hash_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + mov mclen, r20 + + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi cnt0, STATE_INBYTES - 1 +#else + ldi cnt0, STATE_INBYTES +#endif + clr tmp0 +zero_state: + st X+, tmp0 + dec cnt0 + brne zero_state + +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + ldi rn, NR_h + ldi AEDH, 0b100 + +HASH_ABSORBING: + mov radlen, mclen + tst radlen + breq EMPTY_M + ldi rate, HASH_RATE_INBYTES + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + rcall AUTH + rjmp HASH_SQUEEZING + +EMPTY_M: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 + rcall Permutation + +HASH_SQUEEZING: + ldi rate, HASH_SQUEEZE_RATE_INBYTES + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + ldi tcnt, CRYPTO_BYTES +SQUEEZING_loop: + rcall EXTRACT_from_State + subi tcnt, HASH_SQUEEZE_RATE_INBYTES + breq HASH_SQUEEZING_end + rcall Permutation + rjmp SQUEEZING_loop +HASH_SQUEEZING_end: + POP_ALL +ret + +#endif + + +; Byte Order In AVR 8: +; KNOT-AEAD(128, 256, 64): +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[7] <<< 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Perm_row_1[0] 1 +; N[ 9] AEAD_State[ 9] | Perm_row_1[1] 1 +; N[10] AEAD_State[10] | Perm_row_1[2] 1 +; N[11] AEAD_State[11] | Perm_row_1[3] 1 +; N[12] AEAD_State[12] | Perm_row_1[4] 1 +; N[13] AEAD_State[13] | Perm_row_1[5] 1 +; N[14] AEAD_State[14] | Perm_row_1[6] 1 +; N[15] AEAD_State[15] | Perm_row_1[7] <<< 1 +; K[ 0] AEAD_State[16] | Perm_row_2[0] 8 +; K[ 1] AEAD_State[17] | Perm_row_2[1] 8 +; K[ 2] AEAD_State[18] | Perm_row_2[2] 8 +; K[ 3] AEAD_State[19] | Perm_row_2[3] 8 +; K[ 4] AEAD_State[20] | Perm_row_2[4] 8 +; K[ 5] AEAD_State[21] | Perm_row_2[5] 8 +; K[ 6] AEAD_State[22] | Perm_row_2[6] 8 +; K[ 7] AEAD_State[23] | Perm_row_2[7] <<< 8 +; K[ 8] AEAD_State[24] | Perm_row_3[0] 25 +; K[ 9] AEAD_State[25] | Perm_row_3[1] 25 +; K[10] AEAD_State[26] | Perm_row_3[2] 25 +; K[11] AEAD_State[27] | Perm_row_3[3] 25 +; K[12] AEAD_State[28] | Perm_row_3[4] 25 +; K[13] AEAD_State[29] | Perm_row_3[5] 25 +; K[14] AEAD_State[30] | Perm_row_3[6] 25 +; K[15] AEAD_State[31] | ^0x80 Perm_row_3[7] <<< 25 +; +; +; KNOT-AEAD(128, 384, 192): +; Initalization +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[ 0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[ 1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[ 2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[ 3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[ 4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[ 5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[ 6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[ 7] 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Message[ 8] 0x00 Perm_row_0[ 8] 0 Tag[ 8] +; N[ 9] AEAD_State[ 9] | Message[ 9] 0x00 Perm_row_0[ 9] 0 Tag[ 9] +; N[10] AEAD_State[10] | Message[10] 0x00 Perm_row_0[10] 0 Tag[10] +; N[11] AEAD_State[11] | Message[11] 0x00 Perm_row_0[11] <<< 0 Tag[11] +; N[12] AEAD_State[12] | Message[12] 0x00 Perm_row_1[ 0] 1 Tag[12] +; N[13] AEAD_State[13] | Message[13] 0x00 Perm_row_1[ 1] 1 Tag[13] +; N[14] AEAD_State[14] | Message[14] 0x00 Perm_row_1[ 2] 1 Tag[14] +; N[15] AEAD_State[15] | Message[15] 0x00 Perm_row_1[ 3] 1 Tag[15] +; K[ 0] AEAD_State[16] | Message[16] 0x00 Perm_row_1[ 4] 1 +; K[ 1] AEAD_State[17] | Message[17] 0x00 Perm_row_1[ 5] 1 +; K[ 2] AEAD_State[18] | Message[18] 0x00 Perm_row_1[ 6] 1 +; K[ 3] AEAD_State[19] | Message[19] 0x00 Perm_row_1[ 7] 1 +; K[ 4] AEAD_State[20] | Message[20] 0x00 Perm_row_1[ 8] 1 +; K[ 5] AEAD_State[21] | Message[21] 0x00 Perm_row_1[ 9] 1 +; K[ 6] AEAD_State[22] | Message[22] 0x00 Perm_row_1[10] 1 +; K[ 7] AEAD_State[23] | Message[23] 0x00 Perm_row_1[11] <<< 1 +; K[ 8] AEAD_State[24] | Perm_row_2[ 0] 8 +; K[ 9] AEAD_State[25] | Perm_row_2[ 1] 8 +; K[10] AEAD_State[26] | Perm_row_2[ 2] 8 +; K[11] AEAD_State[27] | Perm_row_2[ 3] 8 +; K[12] AEAD_State[28] | Perm_row_2[ 4] 8 +; K[13] AEAD_State[29] | Perm_row_2[ 5] 8 +; K[14] AEAD_State[30] | Perm_row_2[ 6] 8 +; K[15] AEAD_State[31] | Perm_row_2[ 7] 8 +; 0x00 AEAD_State[32] | Perm_row_2[ 8] 8 +; 0x00 AEAD_State[33] | Perm_row_2[ 9] 8 +; 0x00 AEAD_State[34] | Perm_row_2[10] 8 +; 0x00 AEAD_State[35] | Perm_row_2[11] <<< 8 +; 0x00 AEAD_State[36] | Perm_row_3[ 0] 55 +; 0x00 AEAD_State[37] | Perm_row_3[ 1] 55 +; 0x00 AEAD_State[38] | Perm_row_3[ 2] 55 +; 0x00 AEAD_State[39] | Perm_row_3[ 3] 55 +; 0x00 AEAD_State[40] | Perm_row_3[ 4] 55 +; 0x00 AEAD_State[41] | Perm_row_3[ 5] 55 +; 0x00 AEAD_State[42] | Perm_row_3[ 6] 55 +; 0x00 AEAD_State[43] | Perm_row_3[ 7] 55 +; 0x00 AEAD_State[44] | Perm_row_3[ 8] 55 +; 0x00 AEAD_State[45] | Perm_row_3[ 9] 55 +; 0x00 AEAD_State[46] | Perm_row_3[10] 55 +; 0x00 ^0x80 AEAD_State[47] | ^0x80 Perm_row_3[11] <<< 55 diff --git a/knot/Implementations/crypto_hash/knot256v2/avr8_speed/hash.c b/knot/Implementations/crypto_hash/knot256v2/avr8_speed/hash.c new file mode 100644 index 0000000..dbbe4df --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/avr8_speed/hash.c @@ -0,0 +1,32 @@ +#include +#include +#include +#include +#include "api.h" +#include "crypto_hash.h" + +extern void crypto_hash_asm( + unsigned char *out, + const unsigned char *in, + unsigned char inlen + ); + +int crypto_hash( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen +) +{ + /* + ... + ... the code for the hash function implementation goes here + ... generating a hash value out[0],out[1],...,out[CRYPTO_BYTES-1] + ... from a message in[0],in[1],...,in[in-1] + ... + ... return 0; + */ + + crypto_hash_asm(out, in, inlen); + + return 0; +} \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v2/avr8_speed/knot256.h b/knot/Implementations/crypto_hash/knot256v2/avr8_speed/knot256.h new file mode 100644 index 0000000..f99f68b --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/avr8_speed/knot256.h @@ -0,0 +1,306 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; +#define x10 r0 +#define x11 r1 +#define x12 r2 +#define x13 r3 +#define x14 r4 +#define x15 r5 +#define x16 r6 +#define x17 r7 + +; an intentionally arrangement of registers to facilitate movw +#define x20 r8 +#define x21 r10 +#define x22 r12 +#define x23 r14 +#define x24 r9 +#define x25 r11 +#define x26 r13 +#define x27 r15 + +; an intentionally arrangement of registers to facilitate movw +#define x30 r16 +#define x35 r18 +#define x32 r20 +#define x37 r22 +#define x34 r17 +#define x31 r19 +#define x36 r21 +#define x33 r23 + +#define t0j r24 +#define t1j r25 +#define x0j r27 + +#include "assist.h" + +.macro Sbox i0, i1, i2, i3 + mov t0j, \i1 + com \i0 + and \i1, \i0 + eor \i1, \i2 + or \i2, t0j + eor \i0, \i3 + eor \i2, \i0 + eor t0j, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, t0j + and t0j, \i2 + eor \i1, t0j +.endm + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + ld x10, Y+ + ld x11, Y+ + ld x12, Y+ + ld x13, Y+ + ld x14, Y+ + ld x15, Y+ + ld x16, Y+ + ld x17, Y+ + ld x20, Y+ + ld x21, Y+ + ld x22, Y+ + ld x23, Y+ + ld x24, Y+ + ld x25, Y+ + ld x26, Y+ + ld x27, Y+ + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH + rjmp For_Hash +For_AEAD: + ldi ZL, lo8(RC_LFSR6) + ldi ZH, hi8(RC_LFSR6) + rjmp round_loop_start +For_Hash: + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) +#elif defined(CRYPTO_AEAD) + ldi ZL, lo8(RC_LFSR6) + ldi ZH, hi8(RC_LFSR6) +#else + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) +#endif + +round_loop_start: + ; AddRC + lpm t0j, Z+ + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + ld x0j, Y + eor x0j, t0j + + ; SubColumns + Sbox x0j, x10, x20, x30 + st Y+, x0j + ld x0j, Y + Sbox x0j, x11, x21, x31 + st Y+, x0j + ld x0j, Y + Sbox x0j, x12, x22, x32 + st Y+, x0j + ld x0j, Y + Sbox x0j, x13, x23, x33 + st Y+, x0j + ld x0j, Y + Sbox x0j, x14, x24, x34 + st Y+, x0j + ld x0j, Y + Sbox x0j, x15, x25, x35 + st Y+, x0j + ld x0j, Y + Sbox x0j, x16, x26, x36 + st Y+, x0j + ld x0j, Y + Sbox x0j, x17, x27, x37 + st Y, x0j + + ; ShiftRows + ; <<< 1 + mov t0j, x17 + rol t0j + rol x10 + rol x11 + rol x12 + rol x13 + rol x14 + rol x15 + rol x16 + rol x17 + + ; <<< 8 + ; 7 6 5 4 3 2 1 0 => 6 5 4 3 2 1 0 7 + ;mov t0j, x27 + ;mov x27, x26 + ;mov x26, x25 + ;mov x25, x24 + ;mov x24, x23 + ;mov x23, x22 + ;mov x22, x21 + ;mov x21, x20 + ;mov x20, t0j + ; an intentionally arrangement of registers to facilitate movw + movw t0j, x23 ; t1j:t0j <= x27:x23 + movw x23, x22 ; x27:x23 <= x26:x22 + movw x22, x21 ; x26:x22 <= x25:x21 + movw x21, x20 ; x25:x21 <= x24:x20 + mov x20, t1j ; x20 <= t1j + mov x24, t0j ; x24 <= t0j + + ; <<< 1 + mov t0j, x37 + rol t0j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + ; <<< 24 + ; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5 + ;mov t0j, x30 + ;mov x30, x35 + ;mov x35, x32 + ;mov x32, x37 + ;mov x37, x34 + ;mov x34, x31 + ;mov x31, x36 + ;mov x36, x33 + ;mov x33, t0j + ; an intentionally arrangement of registers to facilitate movw + ;x30 r16 + ;x35 r18 + ;x32 r20 + ;x37 r22 + ;x34 r17 + ;x31 r19 + ;x36 r21 + ;x33 r23 + movw t0j, x30 ; t1j:t0j <= x34:x30 + movw x30, x35 ; x34:x30 <= x31:x35 + movw x35, x32 ; x31:x35 <= x36:x32 + movw x32, x37 ; x36:x32 <= x33:x37 + mov x37, t1j ; x37 <= x34 + mov x33, t0j ; x33 <= x30 + + dec rcnt + breq round_loop_end + jmp round_loop_start + +round_loop_end: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + st Y+, x10 + st Y+, x11 + st Y+, x12 + st Y+, x13 + st Y+, x14 + st Y+, x15 + st Y+, x16 + st Y+, x17 + st Y+, x20 + st Y+, x21 + st Y+, x22 + st Y+, x23 + st Y+, x24 + st Y+, x25 + st Y+, x26 + st Y+, x27 + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + + POP_CONFLICT +ret + + +.section .text +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +RC_LFSR6: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06 +.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29 +.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28 +.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24 +.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37 +.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26 +.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f +.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00 +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +#elif defined(CRYPTO_AEAD) +RC_LFSR6: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06 +.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29 +.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28 +.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24 +.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37 +.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26 +.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f +.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00 +#else +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v2/avr8_speed/knot384.h b/knot/Implementations/crypto_hash/knot256v2/avr8_speed/knot384.h new file mode 100644 index 0000000..0b3dd75 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/avr8_speed/knot384.h @@ -0,0 +1,261 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; + +; an intentionally arrangement of registers to facilitate movw +#define x20 r0 +#define x21 r2 +#define x22 r4 +#define x23 r6 +#define x24 r8 +#define x25 r10 +#define x26 r1 +#define x27 r3 +#define x28 r5 +#define x29 r7 +#define x2a r9 +#define x2b r11 + +; an intentionally arrangement of registers to facilitate movw +#define x30 r22 +#define x35 r20 +#define x3a r18 +#define x33 r16 +#define x38 r14 +#define x31 r12 +#define x36 r23 +#define x3b r21 +#define x34 r19 +#define x39 r17 +#define x32 r15 +#define x37 r13 + +#define t0j r24 +#define t1j r25 +#define x0j r25 +#define x1j r27 + +#include "assist.h" + +.macro Sbox i0, i1, i2, i3 + ldi t0j, 0xFF + eor \i0, t0j + mov t0j, \i1 + and \i1, \i0 + eor \i1, \i2 + or \i2, t0j + eor \i0, \i3 + eor \i2, \i0 + eor t0j, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, t0j + and t0j, \i2 + eor \i1, t0j +.endm + +.macro OneColumn i0, i1, i2, i3 + ld \i0, Y + ldd \i1, Y + ROW_INBYTES + Sbox \i0, \i1, \i2, \i3 + st Y+, \i0 + rol \i1 ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES -1, \i1 +.endm + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + ld x20, Y+ + ld x21, Y+ + ld x22, Y+ + ld x23, Y+ + ld x24, Y+ + ld x25, Y+ + ld x26, Y+ + ld x27, Y+ + ld x28, Y+ + ld x29, Y+ + ld x2a, Y+ + ld x2b, Y+ + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) + +round_loop_start: + ; AddRC + lpm t0j, Z+ + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + ld x0j, Y + eor x0j, t0j + + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x20, x30 + st Y+, x0j + lsl x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES -1, x1j + + OneColumn x0j, x1j, x21, x31 + OneColumn x0j, x1j, x22, x32 + OneColumn x0j, x1j, x23, x33 + OneColumn x0j, x1j, x24, x34 + OneColumn x0j, x1j, x25, x35 + OneColumn x0j, x1j, x26, x36 + OneColumn x0j, x1j, x27, x37 + OneColumn x0j, x1j, x28, x38 + OneColumn x0j, x1j, x29, x39 + OneColumn x0j, x1j, x2a, x3a + OneColumn x0j, x1j, x2b, x3b + + ld x1j, Y + eor t0j, t0j + adc x1j, t0j + st Y, x1j + + ; b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- x- -- 1 + ; -- -- -- -- x' -- -- -- -- -- -- -- 7 + ; 4 3 2 1 0 b a 9 8 7 6 5 + + ; ShiftRows -- the last two rows + ; <<< 8 + ; b a 9 8 7 6 5 4 3 2 1 0 => a 9 8 7 6 5 4 3 2 1 0 b + movw t0j, x25 ; t1j:t0j <= x2b:x25 + movw x25, x24 ; x2b:x25 <= x2a:x24 + movw x24, x23 ; x2a:x24 <= x29:x23 + movw x23, x22 ; x29:x23 <= x28:x22 + movw x22, x21 ; x28:x22 <= x27:x21 + movw x21, x20 ; x27:x21 <= x26:x20 + mov x26, t0j ; x26 <= x25 + mov x20, t1j ; x20 <= x2b + + ; >>> 1 + mov t0j, x3b + ror t0j + ror x3a + ror x39 + ror x38 + ror x37 + ror x36 + ror x35 + ror x34 + ror x33 + ror x32 + ror x31 + ror x30 + ror x3b + ; <<< 56 + ; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5 + ; mov x3j, x30 + ; mov x30, x35 + ; mov x35, x3a + ; mov x3a, x33 + ; mov x33, x38 + ; mov x38, x31 + ; mov x31, x36 + ; mov x36, x3b + ; mov x3b, x34 + ; mov x34, x39 + ; mov x39, x32 + ; mov x32, x37 + ; mov x37, x3j + ; an intentionally arrangement of registers to facilitate movw + ; x30 r22 + ; x35 r20 + ; x3a r18 + ; x33 r16 + ; x38 r14 + ; x31 r12 + ; x36 r23 + ; x3b r21 + ; x34 r19 + ; x39 r17 + ; x32 r15 + ; x37 r13 + movw t0j, x30 ; t1j:t0j <= x36:x30 + movw x30, x35 ; x36:x30 <= x3b:x35 + movw x35, x3a ; x3b:x35 <= x34:x3a + movw x3a, x33 ; x34:x3a <= x39:x33 + movw x33, x38 ; x39:x33 <= x32:x38 + movw x38, x31 ; x32:x38 <= x37:x31 + mov x31, t1j ; x31 <= x36 + mov x37, t0j ; x37 <= x30 + + dec rcnt + breq round_loop_end + jmp round_loop_start + +round_loop_end: + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + st Y+, x20 + st Y+, x21 + st Y+, x22 + st Y+, x23 + st Y+, x24 + st Y+, x25 + st Y+, x26 + st Y+, x27 + st Y+, x28 + st Y+, x29 + st Y+, x2a + st Y+, x2b + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + + POP_CONFLICT +ret + +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v2/avr8_speed/knot512.h b/knot/Implementations/crypto_hash/knot256v2/avr8_speed/knot512.h new file mode 100644 index 0000000..b0e4319 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/avr8_speed/knot512.h @@ -0,0 +1,435 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; +#define x20 r0 +#define x22 r2 +#define x24 r4 +#define x26 r6 +#define x28 r1 +#define x2a r3 +#define x2c r5 +#define x2e r7 + +#define x30 r8 +#define x3d r10 +#define x3a r12 +#define x37 r14 +#define x34 r16 +#define x31 r18 +#define x3e r20 +#define x3b r22 +#define x38 r9 +#define x35 r11 +#define x32 r13 +#define x3f r15 +#define x3c r17 +#define x39 r19 +#define x36 r21 +#define x33 r23 + +#define t0j r24 +#define t1j r25 +#define x0j r25 +#define x1j r27 +#define x2j r26 + +#include "assist.h" + +.macro Sbox i0, i1, i2, i3 + ldi t0j, 0xFF + eor \i0, t0j + mov t0j, \i1 + and \i1, \i0 + eor \i1, \i2 + or \i2, t0j + eor \i0, \i3 + eor \i2, \i0 + eor t0j, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, t0j + and t0j, \i2 + eor \i1, t0j +.endm + +.macro TwoColumns i2_e, i3_e, i3_o + ; column 2i + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, \i2_e, \i3_e + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ; column 2i+1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x2j, \i3_o + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + ldd t0j, Y + 2 * ROW_INBYTES + 1 + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t0j +.endm + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + push rcnt + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + + ldd x20, Y + 0x00 + ldd x22, Y + 0x02 + ldd x24, Y + 0x04 + ldd x26, Y + 0x06 + ldd x28, Y + 0x08 + ldd x2a, Y + 0x0a + ldd x2c, Y + 0x0c + ldd x2e, Y + 0x0e + + adiw YL, ROW_INBYTES + + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + ld x3c, Y+ + ld x3d, Y+ + ld x3e, Y+ + ld x3f, Y+ + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH + rjmp For_Hash +For_AEAD: + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) + rjmp round_loop_start +For_Hash: + ldi ZL, lo8(RC_LFSR8) + ldi ZH, hi8(RC_LFSR8) +#elif defined(CRYPTO_AEAD) + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) +#else + ldi ZL, lo8(RC_LFSR8) + ldi ZH, hi8(RC_LFSR8) +#endif + + +round_loop_start: + ; AddRC + lpm t0j, Z+ + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + + ; column 0 + ld x0j, Y + eor x0j, t0j + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x20, x30 + st Y+, x0j + lsl x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ; column 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + Sbox x0j, x1j, x2j, x31 + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + ldd t0j, Y + 2 * ROW_INBYTES + 1 + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t0j + + ; column 2, 3 + TwoColumns x22, x32, x33 + ; column 4, 5 + TwoColumns x24, x34, x35 + ; column 6, 7 + TwoColumns x26, x36, x37 + ; column 8, 9 + TwoColumns x28, x38, x39 + ; column 10, 11 + TwoColumns x2a, x3a, x3b + ; column 12, 13 + TwoColumns x2c, x3c, x3d + + ; column 14 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x2e, x3e + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ; column 15 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x2j, x3f + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ld x1j, Y + eor t0j, t0j + adc x1j, t0j + st Y, x1j + std Y + ROW_INBYTES + 1, x2j + + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2 + ; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3 + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + ; x2e x2c x2a x28 x26 x24 x22 x20 => x2c x2a x28 x26 x24 x22 x20 x2e + ;mov t0j, x2e + ;mov x2e, x2c + ;mov x2c, x2a + ;mov x2a, x28 + ;mov x28, x26 + ;mov x26, x24 + ;mov x24, x22 + ;mov x22, x20 + ;mov x20, t0j + ; an intentionally arrangement of registers to facilitate movw + movw t0j, x26 ; t1j:t0j <= x2e:x26 + movw x26, x24 ; x2e:x26 <= x2c:x24 + movw x24, x22 ; x2c:x24 <= x2a:x22 + movw x22, x20 ; x2a:x22 <= x28:x20 + mov x20, t1j ; x20 <= t1j + mov x28, t0j ; x28 <= t0j + + ; <<< 1 + mov t0j, x3f + rol t0j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + rol x38 + rol x39 + rol x3a + rol x3b + rol x3c + rol x3d + rol x3e + rol x3f + ; <<< 24 + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 => + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + ; mov x3j, x30 + ; mov x30, x3d + ; mov x3d, x3a + ; mov x3a, x37 + ; mov x37, x34 + ; mov x34, x31 + ; mov x31, x3e + ; mov x3e, x3b + ; mov x3b, x38 + ; mov x38, x35 + ; mov x35, x32 + ; mov x32, x3f + ; mov x3f, x3c + ; mov x3c, x39 + ; mov x39, x36 + ; mov x36, x33 + ; mov x33, x3j + ; an intentionally arrangement of registers to facilitate movw + ; x30 r8 + ; x3d r10 + ; x3a r12 + ; x37 r14 + ; x34 r16 + ; x31 r18 + ; x3e r20 + ; x3b r22 + ; x38 r9 + ; x35 r11 + ; x32 r13 + ; x3f r15 + ; x3c r17 + ; x39 r19 + ; x36 r21 + ; x33 r23 + movw t0j, x30 ; t1j:t0j <= x38:x30 + movw x30, x3d ; x38:x30 <= x35:x3d + movw x3d, x3a ; x35:x3d <= x32:x3a + movw x3a, x37 ; x32:x3a <= x3f:x37 + movw x37, x34 ; x3f:x37 <= x3c:x34 + movw x34, x31 ; x3c:x34 <= x39:x31 + movw x31, x3e ; x39:x31 <= x36:x3e + movw x3e, x3b ; x36:x3e <= x33:x3b + mov x3b, t1j ; x3b <= x38 + mov x33, t0j ; x33 <= x30 + + pop rcnt + dec rcnt + push rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + pop rcnt + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + std Y + 0x00, x20 + std Y + 0x02, x22 + std Y + 0x04, x24 + std Y + 0x06, x26 + std Y + 0x08, x28 + std Y + 0x0a, x2a + std Y + 0x0c, x2c + std Y + 0x0e, x2e + adiw YL, ROW_INBYTES + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + st Y+, x3c + st Y+, x3d + st Y+, x3e + st Y+, x3f + + POP_CONFLICT +ret + +.section .text +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +RC_LFSR8: +.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e +.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25 +.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0 +.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9 +.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37 +.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20 +.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad +.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac +.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0 +.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb +.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a +.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8 +.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86 +.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7 +.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c +.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5 +.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4 +.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52 +.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7 +.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76 +.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b +.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4 +.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd +.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46 +.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e +.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab +.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5 +.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09 +.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe +.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17 +.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d +.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00 +#elif defined(CRYPTO_AEAD) +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +#else +RC_LFSR8: +.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e +.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25 +.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0 +.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9 +.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37 +.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20 +.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad +.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac +.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0 +.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb +.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a +.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8 +.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86 +.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7 +.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c +.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5 +.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4 +.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52 +.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7 +.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76 +.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b +.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4 +.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd +.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46 +.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e +.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab +.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5 +.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09 +.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe +.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17 +.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d +.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00 +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v2/avr8_speed/permutation.h b/knot/Implementations/crypto_hash/knot256v2/avr8_speed/permutation.h new file mode 100644 index 0000000..e6c9793 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/avr8_speed/permutation.h @@ -0,0 +1,45 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; R E G I S T E R D E F I N I T I O N S +; ============================================ +; + +#define mclen r16 +#define radlen r17 +#define tcnt r17 +#define tmp0 r20 +#define tmp1 r21 +#define cnt0 r22 +#define rn r23 +#define rate r24 + + +; AEDH = 0b000: for authenticate AD +; AEDH = 0b001: for encryption +; AEDH = 0b011: for decryption +; AEDH = 0b100: for hash +#define AEDH r25 +#define rcnt r26 + +#if (STATE_INBITS==256) +#include "knot256.h" +#elif (STATE_INBITS==384) +#include "knot384.h" +#elif (STATE_INBITS==512) +#include "knot512.h" +#else +#error "Not specified key size and state size" +#endif + + diff --git a/knot/Implementations/crypto_hash/knot384/avr8_speed/api.h b/knot/Implementations/crypto_hash/knot384/avr8_speed/api.h new file mode 100644 index 0000000..1656d0c --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/avr8_speed/api.h @@ -0,0 +1 @@ +#define CRYPTO_BYTES 48 \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot384/avr8_speed/assist.h b/knot/Implementations/crypto_hash/knot384/avr8_speed/assist.h new file mode 100644 index 0000000..f95a717 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/avr8_speed/assist.h @@ -0,0 +1,86 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; +.macro PUSH_CONFLICT + push r16 + push r17 + push r18 + push r19 + + push r23 + push r24 + push r25 + push r26 + push r27 + push r28 + push r29 + push r30 + push r31 +.endm + +.macro POP_CONFLICT + pop r31 + pop r30 + pop r29 + pop r28 + pop r27 + pop r26 + pop r25 + pop r24 + pop r23 + + pop r19 + pop r18 + pop r17 + pop r16 +.endm + +.macro PUSH_ALL + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 +.endm + +.macro POP_ALL + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + clr r1 +.endm \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot384/avr8_speed/config.h b/knot/Implementations/crypto_hash/knot384/avr8_speed/config.h new file mode 100644 index 0000000..c9f6bf2 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/avr8_speed/config.h @@ -0,0 +1,131 @@ +#ifndef __CONFIG_H__ +#define __CONFIG_H__ + +//#define CRYPTO_AEAD +#define CRYPTO_HASH + +#define MAX_MESSAGE_LENGTH 128 + +#define STATE_INBITS 384 +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBITS 192 +/* For CRYPTO_HASH */ +#define CRYPTO_BITS 384 + +#define STATE_INBYTES ((STATE_INBITS + 7) / 8) +#define ROW_INBITS ((STATE_INBITS + 3) / 4) +#define ROW_INBYTES ((ROW_INBITS + 7) / 8) + +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8) +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES +#define CRYPTO_ABYTES CRYPTO_KEYBYTES +#define CRYPTO_NOOVERLAP 1 + +#define MAX_ASSOCIATED_DATA_LENGTH 32 +#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES) + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +/* For CRYPTO_HASH */ +#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8) + + + +#define DOMAIN_BITS 0x80 +#define PAD_BITS 0x01 +#define S384_R192_BITS 0x80 + +#if (STATE_INBITS==256) +#define C1 1 +#define C2 8 +#define C3 25 +#elif (STATE_INBITS==384) +#define C1 1 +#define C2 8 +#define C3 55 +#elif (STATE_INBITS==512) +#define C1 1 +#define C2 16 +#define C3 25 +#else +#error "Not specified state size" +#endif + +#ifdef CRYPTO_AEAD +/* For CRYPTO_AEAD */ +#define KEY_INBITS (CRYPTO_KEYBYTES * 8) +#define KEY_INBYTES (CRYPTO_KEYBYTES) + +#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8) +#define NONCE_INBYTES (CRYPTO_NPUBBYTES) + +#define TAG_INBITS (CRYPTO_ABYTES * 8) +#define TAG_INBYTES (CRYPTO_ABYTES) + +#if (KEY_INBITS==128) && (STATE_INBITS==256) +#define RATE_INBITS 64 +#define NR_0 52 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==128) && (STATE_INBITS==384) +#define RATE_INBITS 192 +#define NR_0 76 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==192) && (STATE_INBITS==384) +#define RATE_INBITS 96 +#define NR_0 76 +#define NR_i 40 +#define NR_f 44 +#elif (KEY_INBITS==256) && (STATE_INBITS==512) +#define RATE_INBITS 128 +#define NR_0 100 +#define NR_i 52 +#define NR_f 56 +#else +#error "Not specified key size and state size" +#endif + +#define RATE_INBYTES ((RATE_INBITS + 7) / 8) +#define SQUEEZE_RATE_INBYTES TAG_INBYTES + +#endif + +#ifdef CRYPTO_HASH +/* For CRYPTO_HASH */ +#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8) + +#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256) +#define HASH_RATE_INBITS 32 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 68 +#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 128 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 80 +#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 48 +#define HASH_SQUEEZE_RATE_INBITS 192 +#define NR_h 104 +#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512) +#define HASH_RATE_INBITS 64 +#define HASH_SQUEEZE_RATE_INBITS 256 +#define NR_h 140 +#else +#error "Not specified hash digest size and state size" +#endif + +#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8) +#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8) + +#endif + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot384/avr8_speed/crypto_hash.h b/knot/Implementations/crypto_hash/knot384/avr8_speed/crypto_hash.h new file mode 100644 index 0000000..342a639 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/avr8_speed/crypto_hash.h @@ -0,0 +1,13 @@ +#ifdef __cplusplus +extern "C" { +#endif + +int crypto_hash( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen + ); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot384/avr8_speed/encrypt.c b/knot/Implementations/crypto_hash/knot384/avr8_speed/encrypt.c new file mode 100644 index 0000000..baf0a3b --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/avr8_speed/encrypt.c @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#include "config.h" + +extern void crypto_aead_encrypt_asm( + unsigned char *c, + const unsigned char *m, + unsigned char mlen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern int crypto_aead_decrypt_asm( + unsigned char *m, + const unsigned char *c, + unsigned char clen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern void crypto_hash_asm( + unsigned char *out, + const unsigned char *in, + unsigned char inlen + ); + + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the cipher implementation goes here, + ... generating a ciphertext c[0],c[1],...,c[*clen-1] + ... from a plaintext m[0],m[1],...,m[mlen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce npub[0],npub[1],.. + ... and secret key k[0],k[1],... + ... the implementation shall not use nsec + ... + ... return 0; + */ + + (void)nsec; + + crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k); + + *clen = mlen + TAG_INBYTES; + return 0; +} + + + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the AEAD implementation goes here, + ... generating a plaintext m[0],m[1],...,m[*mlen-1] + ... and secret message number nsec[0],nsec[1],... + ... from a ciphertext c[0],c[1],...,c[clen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce number npub[0],npub[1],... + ... and secret key k[0],k[1],... + ... + ... return 0; + */ + unsigned long long mlen_; + unsigned char tag_is_match; + + (void)nsec; + if (clen < CRYPTO_ABYTES) { + return -1; + } + mlen_ = clen - CRYPTO_ABYTES; + + tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k); + + if (tag_is_match != 0) + { + memset(m, 0, (size_t)mlen_); + return -1; + } + + *mlen = mlen_; + return 0; +} \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot384/avr8_speed/encrypt_core.S b/knot/Implementations/crypto_hash/knot384/avr8_speed/encrypt_core.S new file mode 100644 index 0000000..bd74f93 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/avr8_speed/encrypt_core.S @@ -0,0 +1,555 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; S R A M D E F I N I T I O N S +; ============================================ +; +#include +#include "config.h" + +.section .noinit + SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (STATE_INBYTES > 32) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (STATE_INBYTES > 48) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_LEN: .BYTE 0, 0 +#ifdef CRYPTO_AEAD +; For CRYPTO_AEAD + SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0 + SRAM_ADLEN: .BYTE 0, 0 + SRAM_NONCE_ADDR: .BYTE 0, 0 + SRAM_KEY_ADDR: .BYTE 0, 0 + + SRAM_ADDITIONAL: + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (CRYPTO_ABYTES > 16) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (CRYPTO_ABYTES > 24) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + +#endif + +.section .text + +#include "permutation.h" + +; require YH:YL be the address of the current associated data/cipher/message block +; for enc and dec, store ciphertext or plaintext +; require ZH:ZL be the address of the current cipher/message block +.macro XOR_to_State_ENCDEC + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov cnt0, rate +XOR_to_State_loop_ENCDEC: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_to_State_loop_ENCDEC +; YH:YL are now the address of the next associated data block +.endm + +; require YH:YL be the address of the current associated data/cipher/message block +; for enc and dec, store ciphertext or plaintext +; require ZH:ZL be the address of the current cipher/message block +.macro XOR_to_State_AUTH + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov cnt0, rate +XOR_to_State_loop_AUTH: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_to_State_loop_AUTH +; YH:YL are now the address of the next associated data block +.endm + + + +; require YH:YL pointed to the input data +; require ZH:ZL pointed to the output data +; require cnt0 containes the nubmer of bytes in source data +; require number of bytes in source data less than rate, i.e., 0 <= cnt0 < rate +; +; the 0th bit in AEDH is used to distinguish (auth AD) or (enc/dec M/C): +; AEDH[0] = 0 for (auth AD), AEDH[0] = 1 for (enc/dec M/C) +; the 1th bit in AEDH is used to distinguish (auth AD/enc M) or (dec C): +; AEDH[1] = 0 for (auth AD/enc M), AEDH[1] = 1 for (dec C) +; AEDH = 0b000 for (auth AD) +; AEDH = 0b001 for (enc M) +; AEDH = 0b011 for (dec C) +Pad_XOR_to_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + tst cnt0 + breq XOR_padded_data +XOR_source_data_loop: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_source_data_loop +XOR_padded_data: + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 +ret + +AddDomain: + ldi XH, hi8(SRAM_STATE + STATE_INBYTES - 1) + ldi XL, lo8(SRAM_STATE + STATE_INBYTES - 1) + ldi tmp0, DOMAIN_BITS + ld tmp1, X + eor tmp0, tmp1 + st X, tmp0 +ret + +; require ZH:ZL be the address of the destination +EXTRACT_from_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov tmp1, rate +EXTRACT_from_State_loop: + ld tmp0, X+ + st Z+, tmp0 + dec tmp1 + brne EXTRACT_from_State_loop +ret + +AUTH: + tst radlen + breq AUTH_end + + cp radlen, rate + brlo auth_ad_padded_block + +auth_ad_loop: + XOR_to_State_AUTH + rcall Permutation + sub radlen, rate + cp radlen, rate + brlo auth_ad_padded_block + rjmp auth_ad_loop + +auth_ad_padded_block: + mov cnt0, radlen + rcall Pad_XOR_to_State + rcall Permutation + +AUTH_end: +ret + +#ifdef CRYPTO_AEAD +Initialization: + ldi rn, NR_0 + ldi XL, lo8(SRAM_STATE) + ldi XH, hi8(SRAM_STATE) + + lds YH, SRAM_NONCE_ADDR + lds YL, SRAM_NONCE_ADDR + 1 + ldi cnt0, CRYPTO_NPUBBYTES +load_nonce_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_nonce_loop + + lds YH, SRAM_KEY_ADDR + lds YL, SRAM_KEY_ADDR + 1 + ldi cnt0, CRYPTO_KEYBYTES +load_key_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_key_loop + +#if (STATE_INBITS==384) && (RATE_INBITS==192) + ldi cnt0, (STATE_INBYTES - CRYPTO_NPUBBYTES - CRYPTO_KEYBYTES - 1) + clr tmp0 +empty_state_loop: + st X+, tmp0 + dec cnt0 + brne empty_state_loop + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + rcall Permutation +ret + +ENC: + tst mclen + breq ENC_end + + cp mclen, rate + brlo enc_padded_block + +enc_loop: + XOR_to_State_ENCDEC + ldi rn, NR_i + rcall Permutation + sub mclen, rate + cp mclen, rate + brlo enc_padded_block + rjmp enc_loop + +enc_padded_block: + mov cnt0, mclen + rcall Pad_XOR_to_State +ENC_end: +ret + +Finalization: + ldi rate, SQUEEZE_RATE_INBYTES + ldi rn, NR_f + rcall Permutation + rcall EXTRACT_from_State +ret + +; void crypto_aead_encrypt_asm( +; unsigned char *c, +; const unsigned char *m, +; unsigned long long mlen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *c, is passed in r24:r25 +; const unsigned char *m, is passed in r22:r23 +; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_encrypt_asm +crypto_aead_encrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b001 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + rcall Finalization + POP_ALL +ret + +; int crypto_aead_decrypt_asm( +; unsigned char *m, +; const unsigned char *c, +; unsigned long long clen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *m, is passed in r24:r25 +; const unsigned char *c, is passed in r22:r23 +; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_decrypt_asm +crypto_aead_decrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b011 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + + ldi ZH, hi8(SRAM_ADDITIONAL) + ldi ZL, lo8(SRAM_ADDITIONAL) + rcall Finalization + + sbiw ZL, CRYPTO_ABYTES + ldi cnt0, CRYPTO_ABYTES +compare_tag: + ld tmp0, Z+ + ld tmp1, Y+ + cp tmp0, tmp1 + brne return_tag_not_match + dec cnt0 + brne compare_tag + rjmp return_tag_match + +return_tag_not_match: + ldi r25, 0xFF + ldi r24, 0xFF + rjmp crypto_aead_decrypt_end + +return_tag_match: + clr r25 + clr r24 +crypto_aead_decrypt_end: + POP_ALL +ret + +; #ifdef CRYPTO_AEAD +#endif + + +#ifdef CRYPTO_HASH + +; void crypto_hash_asm( +; unsigned char *out, +; const unsigned char *in, +; unsigned long long inlen +; ) +; +; unsigned char *out, is passed in r24:r25 +; const unsigned char *in, is passed in r22:r23 +; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used +.global crypto_hash_asm +crypto_hash_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + mov mclen, r20 + + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi cnt0, STATE_INBYTES - 1 +#else + ldi cnt0, STATE_INBYTES +#endif + clr tmp0 +zero_state: + st X+, tmp0 + dec cnt0 + brne zero_state + +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + ldi rn, NR_h + ldi AEDH, 0b100 + +HASH_ABSORBING: + mov radlen, mclen + tst radlen + breq EMPTY_M + ldi rate, HASH_RATE_INBYTES + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + rcall AUTH + rjmp HASH_SQUEEZING + +EMPTY_M: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 + rcall Permutation + +HASH_SQUEEZING: + ldi rate, HASH_SQUEEZE_RATE_INBYTES + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + ldi tcnt, CRYPTO_BYTES +SQUEEZING_loop: + rcall EXTRACT_from_State + subi tcnt, HASH_SQUEEZE_RATE_INBYTES + breq HASH_SQUEEZING_end + rcall Permutation + rjmp SQUEEZING_loop +HASH_SQUEEZING_end: + POP_ALL +ret + +#endif + + +; Byte Order In AVR 8: +; KNOT-AEAD(128, 256, 64): +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[7] <<< 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Perm_row_1[0] 1 +; N[ 9] AEAD_State[ 9] | Perm_row_1[1] 1 +; N[10] AEAD_State[10] | Perm_row_1[2] 1 +; N[11] AEAD_State[11] | Perm_row_1[3] 1 +; N[12] AEAD_State[12] | Perm_row_1[4] 1 +; N[13] AEAD_State[13] | Perm_row_1[5] 1 +; N[14] AEAD_State[14] | Perm_row_1[6] 1 +; N[15] AEAD_State[15] | Perm_row_1[7] <<< 1 +; K[ 0] AEAD_State[16] | Perm_row_2[0] 8 +; K[ 1] AEAD_State[17] | Perm_row_2[1] 8 +; K[ 2] AEAD_State[18] | Perm_row_2[2] 8 +; K[ 3] AEAD_State[19] | Perm_row_2[3] 8 +; K[ 4] AEAD_State[20] | Perm_row_2[4] 8 +; K[ 5] AEAD_State[21] | Perm_row_2[5] 8 +; K[ 6] AEAD_State[22] | Perm_row_2[6] 8 +; K[ 7] AEAD_State[23] | Perm_row_2[7] <<< 8 +; K[ 8] AEAD_State[24] | Perm_row_3[0] 25 +; K[ 9] AEAD_State[25] | Perm_row_3[1] 25 +; K[10] AEAD_State[26] | Perm_row_3[2] 25 +; K[11] AEAD_State[27] | Perm_row_3[3] 25 +; K[12] AEAD_State[28] | Perm_row_3[4] 25 +; K[13] AEAD_State[29] | Perm_row_3[5] 25 +; K[14] AEAD_State[30] | Perm_row_3[6] 25 +; K[15] AEAD_State[31] | ^0x80 Perm_row_3[7] <<< 25 +; +; +; KNOT-AEAD(128, 384, 192): +; Initalization +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[ 0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[ 1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[ 2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[ 3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[ 4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[ 5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[ 6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[ 7] 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Message[ 8] 0x00 Perm_row_0[ 8] 0 Tag[ 8] +; N[ 9] AEAD_State[ 9] | Message[ 9] 0x00 Perm_row_0[ 9] 0 Tag[ 9] +; N[10] AEAD_State[10] | Message[10] 0x00 Perm_row_0[10] 0 Tag[10] +; N[11] AEAD_State[11] | Message[11] 0x00 Perm_row_0[11] <<< 0 Tag[11] +; N[12] AEAD_State[12] | Message[12] 0x00 Perm_row_1[ 0] 1 Tag[12] +; N[13] AEAD_State[13] | Message[13] 0x00 Perm_row_1[ 1] 1 Tag[13] +; N[14] AEAD_State[14] | Message[14] 0x00 Perm_row_1[ 2] 1 Tag[14] +; N[15] AEAD_State[15] | Message[15] 0x00 Perm_row_1[ 3] 1 Tag[15] +; K[ 0] AEAD_State[16] | Message[16] 0x00 Perm_row_1[ 4] 1 +; K[ 1] AEAD_State[17] | Message[17] 0x00 Perm_row_1[ 5] 1 +; K[ 2] AEAD_State[18] | Message[18] 0x00 Perm_row_1[ 6] 1 +; K[ 3] AEAD_State[19] | Message[19] 0x00 Perm_row_1[ 7] 1 +; K[ 4] AEAD_State[20] | Message[20] 0x00 Perm_row_1[ 8] 1 +; K[ 5] AEAD_State[21] | Message[21] 0x00 Perm_row_1[ 9] 1 +; K[ 6] AEAD_State[22] | Message[22] 0x00 Perm_row_1[10] 1 +; K[ 7] AEAD_State[23] | Message[23] 0x00 Perm_row_1[11] <<< 1 +; K[ 8] AEAD_State[24] | Perm_row_2[ 0] 8 +; K[ 9] AEAD_State[25] | Perm_row_2[ 1] 8 +; K[10] AEAD_State[26] | Perm_row_2[ 2] 8 +; K[11] AEAD_State[27] | Perm_row_2[ 3] 8 +; K[12] AEAD_State[28] | Perm_row_2[ 4] 8 +; K[13] AEAD_State[29] | Perm_row_2[ 5] 8 +; K[14] AEAD_State[30] | Perm_row_2[ 6] 8 +; K[15] AEAD_State[31] | Perm_row_2[ 7] 8 +; 0x00 AEAD_State[32] | Perm_row_2[ 8] 8 +; 0x00 AEAD_State[33] | Perm_row_2[ 9] 8 +; 0x00 AEAD_State[34] | Perm_row_2[10] 8 +; 0x00 AEAD_State[35] | Perm_row_2[11] <<< 8 +; 0x00 AEAD_State[36] | Perm_row_3[ 0] 55 +; 0x00 AEAD_State[37] | Perm_row_3[ 1] 55 +; 0x00 AEAD_State[38] | Perm_row_3[ 2] 55 +; 0x00 AEAD_State[39] | Perm_row_3[ 3] 55 +; 0x00 AEAD_State[40] | Perm_row_3[ 4] 55 +; 0x00 AEAD_State[41] | Perm_row_3[ 5] 55 +; 0x00 AEAD_State[42] | Perm_row_3[ 6] 55 +; 0x00 AEAD_State[43] | Perm_row_3[ 7] 55 +; 0x00 AEAD_State[44] | Perm_row_3[ 8] 55 +; 0x00 AEAD_State[45] | Perm_row_3[ 9] 55 +; 0x00 AEAD_State[46] | Perm_row_3[10] 55 +; 0x00 ^0x80 AEAD_State[47] | ^0x80 Perm_row_3[11] <<< 55 diff --git a/knot/Implementations/crypto_hash/knot384/avr8_speed/hash.c b/knot/Implementations/crypto_hash/knot384/avr8_speed/hash.c new file mode 100644 index 0000000..dbbe4df --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/avr8_speed/hash.c @@ -0,0 +1,32 @@ +#include +#include +#include +#include +#include "api.h" +#include "crypto_hash.h" + +extern void crypto_hash_asm( + unsigned char *out, + const unsigned char *in, + unsigned char inlen + ); + +int crypto_hash( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen +) +{ + /* + ... + ... the code for the hash function implementation goes here + ... generating a hash value out[0],out[1],...,out[CRYPTO_BYTES-1] + ... from a message in[0],in[1],...,in[in-1] + ... + ... return 0; + */ + + crypto_hash_asm(out, in, inlen); + + return 0; +} \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot384/avr8_speed/knot256.h b/knot/Implementations/crypto_hash/knot384/avr8_speed/knot256.h new file mode 100644 index 0000000..f99f68b --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/avr8_speed/knot256.h @@ -0,0 +1,306 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; +#define x10 r0 +#define x11 r1 +#define x12 r2 +#define x13 r3 +#define x14 r4 +#define x15 r5 +#define x16 r6 +#define x17 r7 + +; an intentionally arrangement of registers to facilitate movw +#define x20 r8 +#define x21 r10 +#define x22 r12 +#define x23 r14 +#define x24 r9 +#define x25 r11 +#define x26 r13 +#define x27 r15 + +; an intentionally arrangement of registers to facilitate movw +#define x30 r16 +#define x35 r18 +#define x32 r20 +#define x37 r22 +#define x34 r17 +#define x31 r19 +#define x36 r21 +#define x33 r23 + +#define t0j r24 +#define t1j r25 +#define x0j r27 + +#include "assist.h" + +.macro Sbox i0, i1, i2, i3 + mov t0j, \i1 + com \i0 + and \i1, \i0 + eor \i1, \i2 + or \i2, t0j + eor \i0, \i3 + eor \i2, \i0 + eor t0j, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, t0j + and t0j, \i2 + eor \i1, t0j +.endm + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + ld x10, Y+ + ld x11, Y+ + ld x12, Y+ + ld x13, Y+ + ld x14, Y+ + ld x15, Y+ + ld x16, Y+ + ld x17, Y+ + ld x20, Y+ + ld x21, Y+ + ld x22, Y+ + ld x23, Y+ + ld x24, Y+ + ld x25, Y+ + ld x26, Y+ + ld x27, Y+ + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH + rjmp For_Hash +For_AEAD: + ldi ZL, lo8(RC_LFSR6) + ldi ZH, hi8(RC_LFSR6) + rjmp round_loop_start +For_Hash: + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) +#elif defined(CRYPTO_AEAD) + ldi ZL, lo8(RC_LFSR6) + ldi ZH, hi8(RC_LFSR6) +#else + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) +#endif + +round_loop_start: + ; AddRC + lpm t0j, Z+ + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + ld x0j, Y + eor x0j, t0j + + ; SubColumns + Sbox x0j, x10, x20, x30 + st Y+, x0j + ld x0j, Y + Sbox x0j, x11, x21, x31 + st Y+, x0j + ld x0j, Y + Sbox x0j, x12, x22, x32 + st Y+, x0j + ld x0j, Y + Sbox x0j, x13, x23, x33 + st Y+, x0j + ld x0j, Y + Sbox x0j, x14, x24, x34 + st Y+, x0j + ld x0j, Y + Sbox x0j, x15, x25, x35 + st Y+, x0j + ld x0j, Y + Sbox x0j, x16, x26, x36 + st Y+, x0j + ld x0j, Y + Sbox x0j, x17, x27, x37 + st Y, x0j + + ; ShiftRows + ; <<< 1 + mov t0j, x17 + rol t0j + rol x10 + rol x11 + rol x12 + rol x13 + rol x14 + rol x15 + rol x16 + rol x17 + + ; <<< 8 + ; 7 6 5 4 3 2 1 0 => 6 5 4 3 2 1 0 7 + ;mov t0j, x27 + ;mov x27, x26 + ;mov x26, x25 + ;mov x25, x24 + ;mov x24, x23 + ;mov x23, x22 + ;mov x22, x21 + ;mov x21, x20 + ;mov x20, t0j + ; an intentionally arrangement of registers to facilitate movw + movw t0j, x23 ; t1j:t0j <= x27:x23 + movw x23, x22 ; x27:x23 <= x26:x22 + movw x22, x21 ; x26:x22 <= x25:x21 + movw x21, x20 ; x25:x21 <= x24:x20 + mov x20, t1j ; x20 <= t1j + mov x24, t0j ; x24 <= t0j + + ; <<< 1 + mov t0j, x37 + rol t0j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + ; <<< 24 + ; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5 + ;mov t0j, x30 + ;mov x30, x35 + ;mov x35, x32 + ;mov x32, x37 + ;mov x37, x34 + ;mov x34, x31 + ;mov x31, x36 + ;mov x36, x33 + ;mov x33, t0j + ; an intentionally arrangement of registers to facilitate movw + ;x30 r16 + ;x35 r18 + ;x32 r20 + ;x37 r22 + ;x34 r17 + ;x31 r19 + ;x36 r21 + ;x33 r23 + movw t0j, x30 ; t1j:t0j <= x34:x30 + movw x30, x35 ; x34:x30 <= x31:x35 + movw x35, x32 ; x31:x35 <= x36:x32 + movw x32, x37 ; x36:x32 <= x33:x37 + mov x37, t1j ; x37 <= x34 + mov x33, t0j ; x33 <= x30 + + dec rcnt + breq round_loop_end + jmp round_loop_start + +round_loop_end: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + st Y+, x10 + st Y+, x11 + st Y+, x12 + st Y+, x13 + st Y+, x14 + st Y+, x15 + st Y+, x16 + st Y+, x17 + st Y+, x20 + st Y+, x21 + st Y+, x22 + st Y+, x23 + st Y+, x24 + st Y+, x25 + st Y+, x26 + st Y+, x27 + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + + POP_CONFLICT +ret + + +.section .text +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +RC_LFSR6: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06 +.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29 +.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28 +.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24 +.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37 +.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26 +.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f +.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00 +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +#elif defined(CRYPTO_AEAD) +RC_LFSR6: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06 +.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29 +.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28 +.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24 +.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37 +.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26 +.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f +.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00 +#else +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot384/avr8_speed/knot384.h b/knot/Implementations/crypto_hash/knot384/avr8_speed/knot384.h new file mode 100644 index 0000000..0b3dd75 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/avr8_speed/knot384.h @@ -0,0 +1,261 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; + +; an intentionally arrangement of registers to facilitate movw +#define x20 r0 +#define x21 r2 +#define x22 r4 +#define x23 r6 +#define x24 r8 +#define x25 r10 +#define x26 r1 +#define x27 r3 +#define x28 r5 +#define x29 r7 +#define x2a r9 +#define x2b r11 + +; an intentionally arrangement of registers to facilitate movw +#define x30 r22 +#define x35 r20 +#define x3a r18 +#define x33 r16 +#define x38 r14 +#define x31 r12 +#define x36 r23 +#define x3b r21 +#define x34 r19 +#define x39 r17 +#define x32 r15 +#define x37 r13 + +#define t0j r24 +#define t1j r25 +#define x0j r25 +#define x1j r27 + +#include "assist.h" + +.macro Sbox i0, i1, i2, i3 + ldi t0j, 0xFF + eor \i0, t0j + mov t0j, \i1 + and \i1, \i0 + eor \i1, \i2 + or \i2, t0j + eor \i0, \i3 + eor \i2, \i0 + eor t0j, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, t0j + and t0j, \i2 + eor \i1, t0j +.endm + +.macro OneColumn i0, i1, i2, i3 + ld \i0, Y + ldd \i1, Y + ROW_INBYTES + Sbox \i0, \i1, \i2, \i3 + st Y+, \i0 + rol \i1 ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES -1, \i1 +.endm + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + ld x20, Y+ + ld x21, Y+ + ld x22, Y+ + ld x23, Y+ + ld x24, Y+ + ld x25, Y+ + ld x26, Y+ + ld x27, Y+ + ld x28, Y+ + ld x29, Y+ + ld x2a, Y+ + ld x2b, Y+ + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) + +round_loop_start: + ; AddRC + lpm t0j, Z+ + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + ld x0j, Y + eor x0j, t0j + + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x20, x30 + st Y+, x0j + lsl x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES -1, x1j + + OneColumn x0j, x1j, x21, x31 + OneColumn x0j, x1j, x22, x32 + OneColumn x0j, x1j, x23, x33 + OneColumn x0j, x1j, x24, x34 + OneColumn x0j, x1j, x25, x35 + OneColumn x0j, x1j, x26, x36 + OneColumn x0j, x1j, x27, x37 + OneColumn x0j, x1j, x28, x38 + OneColumn x0j, x1j, x29, x39 + OneColumn x0j, x1j, x2a, x3a + OneColumn x0j, x1j, x2b, x3b + + ld x1j, Y + eor t0j, t0j + adc x1j, t0j + st Y, x1j + + ; b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- x- -- 1 + ; -- -- -- -- x' -- -- -- -- -- -- -- 7 + ; 4 3 2 1 0 b a 9 8 7 6 5 + + ; ShiftRows -- the last two rows + ; <<< 8 + ; b a 9 8 7 6 5 4 3 2 1 0 => a 9 8 7 6 5 4 3 2 1 0 b + movw t0j, x25 ; t1j:t0j <= x2b:x25 + movw x25, x24 ; x2b:x25 <= x2a:x24 + movw x24, x23 ; x2a:x24 <= x29:x23 + movw x23, x22 ; x29:x23 <= x28:x22 + movw x22, x21 ; x28:x22 <= x27:x21 + movw x21, x20 ; x27:x21 <= x26:x20 + mov x26, t0j ; x26 <= x25 + mov x20, t1j ; x20 <= x2b + + ; >>> 1 + mov t0j, x3b + ror t0j + ror x3a + ror x39 + ror x38 + ror x37 + ror x36 + ror x35 + ror x34 + ror x33 + ror x32 + ror x31 + ror x30 + ror x3b + ; <<< 56 + ; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5 + ; mov x3j, x30 + ; mov x30, x35 + ; mov x35, x3a + ; mov x3a, x33 + ; mov x33, x38 + ; mov x38, x31 + ; mov x31, x36 + ; mov x36, x3b + ; mov x3b, x34 + ; mov x34, x39 + ; mov x39, x32 + ; mov x32, x37 + ; mov x37, x3j + ; an intentionally arrangement of registers to facilitate movw + ; x30 r22 + ; x35 r20 + ; x3a r18 + ; x33 r16 + ; x38 r14 + ; x31 r12 + ; x36 r23 + ; x3b r21 + ; x34 r19 + ; x39 r17 + ; x32 r15 + ; x37 r13 + movw t0j, x30 ; t1j:t0j <= x36:x30 + movw x30, x35 ; x36:x30 <= x3b:x35 + movw x35, x3a ; x3b:x35 <= x34:x3a + movw x3a, x33 ; x34:x3a <= x39:x33 + movw x33, x38 ; x39:x33 <= x32:x38 + movw x38, x31 ; x32:x38 <= x37:x31 + mov x31, t1j ; x31 <= x36 + mov x37, t0j ; x37 <= x30 + + dec rcnt + breq round_loop_end + jmp round_loop_start + +round_loop_end: + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + st Y+, x20 + st Y+, x21 + st Y+, x22 + st Y+, x23 + st Y+, x24 + st Y+, x25 + st Y+, x26 + st Y+, x27 + st Y+, x28 + st Y+, x29 + st Y+, x2a + st Y+, x2b + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + + POP_CONFLICT +ret + +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot384/avr8_speed/knot512.h b/knot/Implementations/crypto_hash/knot384/avr8_speed/knot512.h new file mode 100644 index 0000000..b0e4319 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/avr8_speed/knot512.h @@ -0,0 +1,435 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; +#define x20 r0 +#define x22 r2 +#define x24 r4 +#define x26 r6 +#define x28 r1 +#define x2a r3 +#define x2c r5 +#define x2e r7 + +#define x30 r8 +#define x3d r10 +#define x3a r12 +#define x37 r14 +#define x34 r16 +#define x31 r18 +#define x3e r20 +#define x3b r22 +#define x38 r9 +#define x35 r11 +#define x32 r13 +#define x3f r15 +#define x3c r17 +#define x39 r19 +#define x36 r21 +#define x33 r23 + +#define t0j r24 +#define t1j r25 +#define x0j r25 +#define x1j r27 +#define x2j r26 + +#include "assist.h" + +.macro Sbox i0, i1, i2, i3 + ldi t0j, 0xFF + eor \i0, t0j + mov t0j, \i1 + and \i1, \i0 + eor \i1, \i2 + or \i2, t0j + eor \i0, \i3 + eor \i2, \i0 + eor t0j, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, t0j + and t0j, \i2 + eor \i1, t0j +.endm + +.macro TwoColumns i2_e, i3_e, i3_o + ; column 2i + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, \i2_e, \i3_e + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ; column 2i+1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x2j, \i3_o + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + ldd t0j, Y + 2 * ROW_INBYTES + 1 + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t0j +.endm + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + push rcnt + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + + ldd x20, Y + 0x00 + ldd x22, Y + 0x02 + ldd x24, Y + 0x04 + ldd x26, Y + 0x06 + ldd x28, Y + 0x08 + ldd x2a, Y + 0x0a + ldd x2c, Y + 0x0c + ldd x2e, Y + 0x0e + + adiw YL, ROW_INBYTES + + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + ld x3c, Y+ + ld x3d, Y+ + ld x3e, Y+ + ld x3f, Y+ + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH + rjmp For_Hash +For_AEAD: + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) + rjmp round_loop_start +For_Hash: + ldi ZL, lo8(RC_LFSR8) + ldi ZH, hi8(RC_LFSR8) +#elif defined(CRYPTO_AEAD) + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) +#else + ldi ZL, lo8(RC_LFSR8) + ldi ZH, hi8(RC_LFSR8) +#endif + + +round_loop_start: + ; AddRC + lpm t0j, Z+ + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + + ; column 0 + ld x0j, Y + eor x0j, t0j + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x20, x30 + st Y+, x0j + lsl x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ; column 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + Sbox x0j, x1j, x2j, x31 + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + ldd t0j, Y + 2 * ROW_INBYTES + 1 + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t0j + + ; column 2, 3 + TwoColumns x22, x32, x33 + ; column 4, 5 + TwoColumns x24, x34, x35 + ; column 6, 7 + TwoColumns x26, x36, x37 + ; column 8, 9 + TwoColumns x28, x38, x39 + ; column 10, 11 + TwoColumns x2a, x3a, x3b + ; column 12, 13 + TwoColumns x2c, x3c, x3d + + ; column 14 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x2e, x3e + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ; column 15 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x2j, x3f + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ld x1j, Y + eor t0j, t0j + adc x1j, t0j + st Y, x1j + std Y + ROW_INBYTES + 1, x2j + + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2 + ; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3 + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + ; x2e x2c x2a x28 x26 x24 x22 x20 => x2c x2a x28 x26 x24 x22 x20 x2e + ;mov t0j, x2e + ;mov x2e, x2c + ;mov x2c, x2a + ;mov x2a, x28 + ;mov x28, x26 + ;mov x26, x24 + ;mov x24, x22 + ;mov x22, x20 + ;mov x20, t0j + ; an intentionally arrangement of registers to facilitate movw + movw t0j, x26 ; t1j:t0j <= x2e:x26 + movw x26, x24 ; x2e:x26 <= x2c:x24 + movw x24, x22 ; x2c:x24 <= x2a:x22 + movw x22, x20 ; x2a:x22 <= x28:x20 + mov x20, t1j ; x20 <= t1j + mov x28, t0j ; x28 <= t0j + + ; <<< 1 + mov t0j, x3f + rol t0j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + rol x38 + rol x39 + rol x3a + rol x3b + rol x3c + rol x3d + rol x3e + rol x3f + ; <<< 24 + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 => + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + ; mov x3j, x30 + ; mov x30, x3d + ; mov x3d, x3a + ; mov x3a, x37 + ; mov x37, x34 + ; mov x34, x31 + ; mov x31, x3e + ; mov x3e, x3b + ; mov x3b, x38 + ; mov x38, x35 + ; mov x35, x32 + ; mov x32, x3f + ; mov x3f, x3c + ; mov x3c, x39 + ; mov x39, x36 + ; mov x36, x33 + ; mov x33, x3j + ; an intentionally arrangement of registers to facilitate movw + ; x30 r8 + ; x3d r10 + ; x3a r12 + ; x37 r14 + ; x34 r16 + ; x31 r18 + ; x3e r20 + ; x3b r22 + ; x38 r9 + ; x35 r11 + ; x32 r13 + ; x3f r15 + ; x3c r17 + ; x39 r19 + ; x36 r21 + ; x33 r23 + movw t0j, x30 ; t1j:t0j <= x38:x30 + movw x30, x3d ; x38:x30 <= x35:x3d + movw x3d, x3a ; x35:x3d <= x32:x3a + movw x3a, x37 ; x32:x3a <= x3f:x37 + movw x37, x34 ; x3f:x37 <= x3c:x34 + movw x34, x31 ; x3c:x34 <= x39:x31 + movw x31, x3e ; x39:x31 <= x36:x3e + movw x3e, x3b ; x36:x3e <= x33:x3b + mov x3b, t1j ; x3b <= x38 + mov x33, t0j ; x33 <= x30 + + pop rcnt + dec rcnt + push rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + pop rcnt + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + std Y + 0x00, x20 + std Y + 0x02, x22 + std Y + 0x04, x24 + std Y + 0x06, x26 + std Y + 0x08, x28 + std Y + 0x0a, x2a + std Y + 0x0c, x2c + std Y + 0x0e, x2e + adiw YL, ROW_INBYTES + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + st Y+, x3c + st Y+, x3d + st Y+, x3e + st Y+, x3f + + POP_CONFLICT +ret + +.section .text +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +RC_LFSR8: +.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e +.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25 +.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0 +.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9 +.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37 +.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20 +.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad +.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac +.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0 +.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb +.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a +.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8 +.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86 +.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7 +.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c +.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5 +.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4 +.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52 +.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7 +.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76 +.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b +.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4 +.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd +.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46 +.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e +.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab +.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5 +.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09 +.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe +.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17 +.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d +.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00 +#elif defined(CRYPTO_AEAD) +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +#else +RC_LFSR8: +.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e +.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25 +.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0 +.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9 +.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37 +.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20 +.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad +.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac +.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0 +.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb +.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a +.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8 +.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86 +.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7 +.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c +.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5 +.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4 +.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52 +.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7 +.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76 +.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b +.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4 +.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd +.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46 +.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e +.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab +.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5 +.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09 +.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe +.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17 +.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d +.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00 +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot384/avr8_speed/permutation.h b/knot/Implementations/crypto_hash/knot384/avr8_speed/permutation.h new file mode 100644 index 0000000..e6c9793 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/avr8_speed/permutation.h @@ -0,0 +1,45 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; R E G I S T E R D E F I N I T I O N S +; ============================================ +; + +#define mclen r16 +#define radlen r17 +#define tcnt r17 +#define tmp0 r20 +#define tmp1 r21 +#define cnt0 r22 +#define rn r23 +#define rate r24 + + +; AEDH = 0b000: for authenticate AD +; AEDH = 0b001: for encryption +; AEDH = 0b011: for decryption +; AEDH = 0b100: for hash +#define AEDH r25 +#define rcnt r26 + +#if (STATE_INBITS==256) +#include "knot256.h" +#elif (STATE_INBITS==384) +#include "knot384.h" +#elif (STATE_INBITS==512) +#include "knot512.h" +#else +#error "Not specified key size and state size" +#endif + + diff --git a/knot/Implementations/crypto_hash/knot512/avr8_speed/api.h b/knot/Implementations/crypto_hash/knot512/avr8_speed/api.h new file mode 100644 index 0000000..a46499d --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/avr8_speed/api.h @@ -0,0 +1 @@ +#define CRYPTO_BYTES 64 \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot512/avr8_speed/assist.h b/knot/Implementations/crypto_hash/knot512/avr8_speed/assist.h new file mode 100644 index 0000000..f95a717 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/avr8_speed/assist.h @@ -0,0 +1,86 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; +.macro PUSH_CONFLICT + push r16 + push r17 + push r18 + push r19 + + push r23 + push r24 + push r25 + push r26 + push r27 + push r28 + push r29 + push r30 + push r31 +.endm + +.macro POP_CONFLICT + pop r31 + pop r30 + pop r29 + pop r28 + pop r27 + pop r26 + pop r25 + pop r24 + pop r23 + + pop r19 + pop r18 + pop r17 + pop r16 +.endm + +.macro PUSH_ALL + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 +.endm + +.macro POP_ALL + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + clr r1 +.endm \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot512/avr8_speed/config.h b/knot/Implementations/crypto_hash/knot512/avr8_speed/config.h new file mode 100644 index 0000000..70fa8d6 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/avr8_speed/config.h @@ -0,0 +1,131 @@ +#ifndef __CONFIG_H__ +#define __CONFIG_H__ + +//#define CRYPTO_AEAD +#define CRYPTO_HASH + +#define MAX_MESSAGE_LENGTH 128 + +#define STATE_INBITS 512 +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBITS 256 +/* For CRYPTO_HASH */ +#define CRYPTO_BITS 512 + +#define STATE_INBYTES ((STATE_INBITS + 7) / 8) +#define ROW_INBITS ((STATE_INBITS + 3) / 4) +#define ROW_INBYTES ((ROW_INBITS + 7) / 8) + +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8) +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES +#define CRYPTO_ABYTES CRYPTO_KEYBYTES +#define CRYPTO_NOOVERLAP 1 + +#define MAX_ASSOCIATED_DATA_LENGTH 32 +#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES) + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +/* For CRYPTO_HASH */ +#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8) + + + +#define DOMAIN_BITS 0x80 +#define PAD_BITS 0x01 +#define S384_R192_BITS 0x80 + +#if (STATE_INBITS==256) +#define C1 1 +#define C2 8 +#define C3 25 +#elif (STATE_INBITS==384) +#define C1 1 +#define C2 8 +#define C3 55 +#elif (STATE_INBITS==512) +#define C1 1 +#define C2 16 +#define C3 25 +#else +#error "Not specified state size" +#endif + +#ifdef CRYPTO_AEAD +/* For CRYPTO_AEAD */ +#define KEY_INBITS (CRYPTO_KEYBYTES * 8) +#define KEY_INBYTES (CRYPTO_KEYBYTES) + +#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8) +#define NONCE_INBYTES (CRYPTO_NPUBBYTES) + +#define TAG_INBITS (CRYPTO_ABYTES * 8) +#define TAG_INBYTES (CRYPTO_ABYTES) + +#if (KEY_INBITS==128) && (STATE_INBITS==256) +#define RATE_INBITS 64 +#define NR_0 52 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==128) && (STATE_INBITS==384) +#define RATE_INBITS 192 +#define NR_0 76 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==192) && (STATE_INBITS==384) +#define RATE_INBITS 96 +#define NR_0 76 +#define NR_i 40 +#define NR_f 44 +#elif (KEY_INBITS==256) && (STATE_INBITS==512) +#define RATE_INBITS 128 +#define NR_0 100 +#define NR_i 52 +#define NR_f 56 +#else +#error "Not specified key size and state size" +#endif + +#define RATE_INBYTES ((RATE_INBITS + 7) / 8) +#define SQUEEZE_RATE_INBYTES TAG_INBYTES + +#endif + +#ifdef CRYPTO_HASH +/* For CRYPTO_HASH */ +#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8) + +#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256) +#define HASH_RATE_INBITS 32 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 68 +#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 128 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 80 +#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 48 +#define HASH_SQUEEZE_RATE_INBITS 192 +#define NR_h 104 +#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512) +#define HASH_RATE_INBITS 64 +#define HASH_SQUEEZE_RATE_INBITS 256 +#define NR_h 140 +#else +#error "Not specified hash digest size and state size" +#endif + +#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8) +#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8) + +#endif + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot512/avr8_speed/crypto_hash.h b/knot/Implementations/crypto_hash/knot512/avr8_speed/crypto_hash.h new file mode 100644 index 0000000..342a639 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/avr8_speed/crypto_hash.h @@ -0,0 +1,13 @@ +#ifdef __cplusplus +extern "C" { +#endif + +int crypto_hash( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen + ); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot512/avr8_speed/encrypt.c b/knot/Implementations/crypto_hash/knot512/avr8_speed/encrypt.c new file mode 100644 index 0000000..baf0a3b --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/avr8_speed/encrypt.c @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#include "config.h" + +extern void crypto_aead_encrypt_asm( + unsigned char *c, + const unsigned char *m, + unsigned char mlen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern int crypto_aead_decrypt_asm( + unsigned char *m, + const unsigned char *c, + unsigned char clen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern void crypto_hash_asm( + unsigned char *out, + const unsigned char *in, + unsigned char inlen + ); + + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the cipher implementation goes here, + ... generating a ciphertext c[0],c[1],...,c[*clen-1] + ... from a plaintext m[0],m[1],...,m[mlen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce npub[0],npub[1],.. + ... and secret key k[0],k[1],... + ... the implementation shall not use nsec + ... + ... return 0; + */ + + (void)nsec; + + crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k); + + *clen = mlen + TAG_INBYTES; + return 0; +} + + + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the AEAD implementation goes here, + ... generating a plaintext m[0],m[1],...,m[*mlen-1] + ... and secret message number nsec[0],nsec[1],... + ... from a ciphertext c[0],c[1],...,c[clen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce number npub[0],npub[1],... + ... and secret key k[0],k[1],... + ... + ... return 0; + */ + unsigned long long mlen_; + unsigned char tag_is_match; + + (void)nsec; + if (clen < CRYPTO_ABYTES) { + return -1; + } + mlen_ = clen - CRYPTO_ABYTES; + + tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k); + + if (tag_is_match != 0) + { + memset(m, 0, (size_t)mlen_); + return -1; + } + + *mlen = mlen_; + return 0; +} \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot512/avr8_speed/encrypt_core.S b/knot/Implementations/crypto_hash/knot512/avr8_speed/encrypt_core.S new file mode 100644 index 0000000..bd74f93 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/avr8_speed/encrypt_core.S @@ -0,0 +1,555 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; S R A M D E F I N I T I O N S +; ============================================ +; +#include +#include "config.h" + +.section .noinit + SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (STATE_INBYTES > 32) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (STATE_INBYTES > 48) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_LEN: .BYTE 0, 0 +#ifdef CRYPTO_AEAD +; For CRYPTO_AEAD + SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0 + SRAM_ADLEN: .BYTE 0, 0 + SRAM_NONCE_ADDR: .BYTE 0, 0 + SRAM_KEY_ADDR: .BYTE 0, 0 + + SRAM_ADDITIONAL: + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (CRYPTO_ABYTES > 16) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (CRYPTO_ABYTES > 24) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + +#endif + +.section .text + +#include "permutation.h" + +; require YH:YL be the address of the current associated data/cipher/message block +; for enc and dec, store ciphertext or plaintext +; require ZH:ZL be the address of the current cipher/message block +.macro XOR_to_State_ENCDEC + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov cnt0, rate +XOR_to_State_loop_ENCDEC: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_to_State_loop_ENCDEC +; YH:YL are now the address of the next associated data block +.endm + +; require YH:YL be the address of the current associated data/cipher/message block +; for enc and dec, store ciphertext or plaintext +; require ZH:ZL be the address of the current cipher/message block +.macro XOR_to_State_AUTH + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov cnt0, rate +XOR_to_State_loop_AUTH: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_to_State_loop_AUTH +; YH:YL are now the address of the next associated data block +.endm + + + +; require YH:YL pointed to the input data +; require ZH:ZL pointed to the output data +; require cnt0 containes the nubmer of bytes in source data +; require number of bytes in source data less than rate, i.e., 0 <= cnt0 < rate +; +; the 0th bit in AEDH is used to distinguish (auth AD) or (enc/dec M/C): +; AEDH[0] = 0 for (auth AD), AEDH[0] = 1 for (enc/dec M/C) +; the 1th bit in AEDH is used to distinguish (auth AD/enc M) or (dec C): +; AEDH[1] = 0 for (auth AD/enc M), AEDH[1] = 1 for (dec C) +; AEDH = 0b000 for (auth AD) +; AEDH = 0b001 for (enc M) +; AEDH = 0b011 for (dec C) +Pad_XOR_to_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + tst cnt0 + breq XOR_padded_data +XOR_source_data_loop: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_source_data_loop +XOR_padded_data: + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 +ret + +AddDomain: + ldi XH, hi8(SRAM_STATE + STATE_INBYTES - 1) + ldi XL, lo8(SRAM_STATE + STATE_INBYTES - 1) + ldi tmp0, DOMAIN_BITS + ld tmp1, X + eor tmp0, tmp1 + st X, tmp0 +ret + +; require ZH:ZL be the address of the destination +EXTRACT_from_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov tmp1, rate +EXTRACT_from_State_loop: + ld tmp0, X+ + st Z+, tmp0 + dec tmp1 + brne EXTRACT_from_State_loop +ret + +AUTH: + tst radlen + breq AUTH_end + + cp radlen, rate + brlo auth_ad_padded_block + +auth_ad_loop: + XOR_to_State_AUTH + rcall Permutation + sub radlen, rate + cp radlen, rate + brlo auth_ad_padded_block + rjmp auth_ad_loop + +auth_ad_padded_block: + mov cnt0, radlen + rcall Pad_XOR_to_State + rcall Permutation + +AUTH_end: +ret + +#ifdef CRYPTO_AEAD +Initialization: + ldi rn, NR_0 + ldi XL, lo8(SRAM_STATE) + ldi XH, hi8(SRAM_STATE) + + lds YH, SRAM_NONCE_ADDR + lds YL, SRAM_NONCE_ADDR + 1 + ldi cnt0, CRYPTO_NPUBBYTES +load_nonce_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_nonce_loop + + lds YH, SRAM_KEY_ADDR + lds YL, SRAM_KEY_ADDR + 1 + ldi cnt0, CRYPTO_KEYBYTES +load_key_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_key_loop + +#if (STATE_INBITS==384) && (RATE_INBITS==192) + ldi cnt0, (STATE_INBYTES - CRYPTO_NPUBBYTES - CRYPTO_KEYBYTES - 1) + clr tmp0 +empty_state_loop: + st X+, tmp0 + dec cnt0 + brne empty_state_loop + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + rcall Permutation +ret + +ENC: + tst mclen + breq ENC_end + + cp mclen, rate + brlo enc_padded_block + +enc_loop: + XOR_to_State_ENCDEC + ldi rn, NR_i + rcall Permutation + sub mclen, rate + cp mclen, rate + brlo enc_padded_block + rjmp enc_loop + +enc_padded_block: + mov cnt0, mclen + rcall Pad_XOR_to_State +ENC_end: +ret + +Finalization: + ldi rate, SQUEEZE_RATE_INBYTES + ldi rn, NR_f + rcall Permutation + rcall EXTRACT_from_State +ret + +; void crypto_aead_encrypt_asm( +; unsigned char *c, +; const unsigned char *m, +; unsigned long long mlen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *c, is passed in r24:r25 +; const unsigned char *m, is passed in r22:r23 +; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_encrypt_asm +crypto_aead_encrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b001 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + rcall Finalization + POP_ALL +ret + +; int crypto_aead_decrypt_asm( +; unsigned char *m, +; const unsigned char *c, +; unsigned long long clen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *m, is passed in r24:r25 +; const unsigned char *c, is passed in r22:r23 +; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_decrypt_asm +crypto_aead_decrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b011 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + + ldi ZH, hi8(SRAM_ADDITIONAL) + ldi ZL, lo8(SRAM_ADDITIONAL) + rcall Finalization + + sbiw ZL, CRYPTO_ABYTES + ldi cnt0, CRYPTO_ABYTES +compare_tag: + ld tmp0, Z+ + ld tmp1, Y+ + cp tmp0, tmp1 + brne return_tag_not_match + dec cnt0 + brne compare_tag + rjmp return_tag_match + +return_tag_not_match: + ldi r25, 0xFF + ldi r24, 0xFF + rjmp crypto_aead_decrypt_end + +return_tag_match: + clr r25 + clr r24 +crypto_aead_decrypt_end: + POP_ALL +ret + +; #ifdef CRYPTO_AEAD +#endif + + +#ifdef CRYPTO_HASH + +; void crypto_hash_asm( +; unsigned char *out, +; const unsigned char *in, +; unsigned long long inlen +; ) +; +; unsigned char *out, is passed in r24:r25 +; const unsigned char *in, is passed in r22:r23 +; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used +.global crypto_hash_asm +crypto_hash_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + mov mclen, r20 + + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi cnt0, STATE_INBYTES - 1 +#else + ldi cnt0, STATE_INBYTES +#endif + clr tmp0 +zero_state: + st X+, tmp0 + dec cnt0 + brne zero_state + +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + ldi rn, NR_h + ldi AEDH, 0b100 + +HASH_ABSORBING: + mov radlen, mclen + tst radlen + breq EMPTY_M + ldi rate, HASH_RATE_INBYTES + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + rcall AUTH + rjmp HASH_SQUEEZING + +EMPTY_M: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 + rcall Permutation + +HASH_SQUEEZING: + ldi rate, HASH_SQUEEZE_RATE_INBYTES + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + ldi tcnt, CRYPTO_BYTES +SQUEEZING_loop: + rcall EXTRACT_from_State + subi tcnt, HASH_SQUEEZE_RATE_INBYTES + breq HASH_SQUEEZING_end + rcall Permutation + rjmp SQUEEZING_loop +HASH_SQUEEZING_end: + POP_ALL +ret + +#endif + + +; Byte Order In AVR 8: +; KNOT-AEAD(128, 256, 64): +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[7] <<< 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Perm_row_1[0] 1 +; N[ 9] AEAD_State[ 9] | Perm_row_1[1] 1 +; N[10] AEAD_State[10] | Perm_row_1[2] 1 +; N[11] AEAD_State[11] | Perm_row_1[3] 1 +; N[12] AEAD_State[12] | Perm_row_1[4] 1 +; N[13] AEAD_State[13] | Perm_row_1[5] 1 +; N[14] AEAD_State[14] | Perm_row_1[6] 1 +; N[15] AEAD_State[15] | Perm_row_1[7] <<< 1 +; K[ 0] AEAD_State[16] | Perm_row_2[0] 8 +; K[ 1] AEAD_State[17] | Perm_row_2[1] 8 +; K[ 2] AEAD_State[18] | Perm_row_2[2] 8 +; K[ 3] AEAD_State[19] | Perm_row_2[3] 8 +; K[ 4] AEAD_State[20] | Perm_row_2[4] 8 +; K[ 5] AEAD_State[21] | Perm_row_2[5] 8 +; K[ 6] AEAD_State[22] | Perm_row_2[6] 8 +; K[ 7] AEAD_State[23] | Perm_row_2[7] <<< 8 +; K[ 8] AEAD_State[24] | Perm_row_3[0] 25 +; K[ 9] AEAD_State[25] | Perm_row_3[1] 25 +; K[10] AEAD_State[26] | Perm_row_3[2] 25 +; K[11] AEAD_State[27] | Perm_row_3[3] 25 +; K[12] AEAD_State[28] | Perm_row_3[4] 25 +; K[13] AEAD_State[29] | Perm_row_3[5] 25 +; K[14] AEAD_State[30] | Perm_row_3[6] 25 +; K[15] AEAD_State[31] | ^0x80 Perm_row_3[7] <<< 25 +; +; +; KNOT-AEAD(128, 384, 192): +; Initalization +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[ 0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[ 1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[ 2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[ 3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[ 4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[ 5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[ 6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[ 7] 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Message[ 8] 0x00 Perm_row_0[ 8] 0 Tag[ 8] +; N[ 9] AEAD_State[ 9] | Message[ 9] 0x00 Perm_row_0[ 9] 0 Tag[ 9] +; N[10] AEAD_State[10] | Message[10] 0x00 Perm_row_0[10] 0 Tag[10] +; N[11] AEAD_State[11] | Message[11] 0x00 Perm_row_0[11] <<< 0 Tag[11] +; N[12] AEAD_State[12] | Message[12] 0x00 Perm_row_1[ 0] 1 Tag[12] +; N[13] AEAD_State[13] | Message[13] 0x00 Perm_row_1[ 1] 1 Tag[13] +; N[14] AEAD_State[14] | Message[14] 0x00 Perm_row_1[ 2] 1 Tag[14] +; N[15] AEAD_State[15] | Message[15] 0x00 Perm_row_1[ 3] 1 Tag[15] +; K[ 0] AEAD_State[16] | Message[16] 0x00 Perm_row_1[ 4] 1 +; K[ 1] AEAD_State[17] | Message[17] 0x00 Perm_row_1[ 5] 1 +; K[ 2] AEAD_State[18] | Message[18] 0x00 Perm_row_1[ 6] 1 +; K[ 3] AEAD_State[19] | Message[19] 0x00 Perm_row_1[ 7] 1 +; K[ 4] AEAD_State[20] | Message[20] 0x00 Perm_row_1[ 8] 1 +; K[ 5] AEAD_State[21] | Message[21] 0x00 Perm_row_1[ 9] 1 +; K[ 6] AEAD_State[22] | Message[22] 0x00 Perm_row_1[10] 1 +; K[ 7] AEAD_State[23] | Message[23] 0x00 Perm_row_1[11] <<< 1 +; K[ 8] AEAD_State[24] | Perm_row_2[ 0] 8 +; K[ 9] AEAD_State[25] | Perm_row_2[ 1] 8 +; K[10] AEAD_State[26] | Perm_row_2[ 2] 8 +; K[11] AEAD_State[27] | Perm_row_2[ 3] 8 +; K[12] AEAD_State[28] | Perm_row_2[ 4] 8 +; K[13] AEAD_State[29] | Perm_row_2[ 5] 8 +; K[14] AEAD_State[30] | Perm_row_2[ 6] 8 +; K[15] AEAD_State[31] | Perm_row_2[ 7] 8 +; 0x00 AEAD_State[32] | Perm_row_2[ 8] 8 +; 0x00 AEAD_State[33] | Perm_row_2[ 9] 8 +; 0x00 AEAD_State[34] | Perm_row_2[10] 8 +; 0x00 AEAD_State[35] | Perm_row_2[11] <<< 8 +; 0x00 AEAD_State[36] | Perm_row_3[ 0] 55 +; 0x00 AEAD_State[37] | Perm_row_3[ 1] 55 +; 0x00 AEAD_State[38] | Perm_row_3[ 2] 55 +; 0x00 AEAD_State[39] | Perm_row_3[ 3] 55 +; 0x00 AEAD_State[40] | Perm_row_3[ 4] 55 +; 0x00 AEAD_State[41] | Perm_row_3[ 5] 55 +; 0x00 AEAD_State[42] | Perm_row_3[ 6] 55 +; 0x00 AEAD_State[43] | Perm_row_3[ 7] 55 +; 0x00 AEAD_State[44] | Perm_row_3[ 8] 55 +; 0x00 AEAD_State[45] | Perm_row_3[ 9] 55 +; 0x00 AEAD_State[46] | Perm_row_3[10] 55 +; 0x00 ^0x80 AEAD_State[47] | ^0x80 Perm_row_3[11] <<< 55 diff --git a/knot/Implementations/crypto_hash/knot512/avr8_speed/hash.c b/knot/Implementations/crypto_hash/knot512/avr8_speed/hash.c new file mode 100644 index 0000000..dbbe4df --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/avr8_speed/hash.c @@ -0,0 +1,32 @@ +#include +#include +#include +#include +#include "api.h" +#include "crypto_hash.h" + +extern void crypto_hash_asm( + unsigned char *out, + const unsigned char *in, + unsigned char inlen + ); + +int crypto_hash( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen +) +{ + /* + ... + ... the code for the hash function implementation goes here + ... generating a hash value out[0],out[1],...,out[CRYPTO_BYTES-1] + ... from a message in[0],in[1],...,in[in-1] + ... + ... return 0; + */ + + crypto_hash_asm(out, in, inlen); + + return 0; +} \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot512/avr8_speed/knot256.h b/knot/Implementations/crypto_hash/knot512/avr8_speed/knot256.h new file mode 100644 index 0000000..f99f68b --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/avr8_speed/knot256.h @@ -0,0 +1,306 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; +#define x10 r0 +#define x11 r1 +#define x12 r2 +#define x13 r3 +#define x14 r4 +#define x15 r5 +#define x16 r6 +#define x17 r7 + +; an intentionally arrangement of registers to facilitate movw +#define x20 r8 +#define x21 r10 +#define x22 r12 +#define x23 r14 +#define x24 r9 +#define x25 r11 +#define x26 r13 +#define x27 r15 + +; an intentionally arrangement of registers to facilitate movw +#define x30 r16 +#define x35 r18 +#define x32 r20 +#define x37 r22 +#define x34 r17 +#define x31 r19 +#define x36 r21 +#define x33 r23 + +#define t0j r24 +#define t1j r25 +#define x0j r27 + +#include "assist.h" + +.macro Sbox i0, i1, i2, i3 + mov t0j, \i1 + com \i0 + and \i1, \i0 + eor \i1, \i2 + or \i2, t0j + eor \i0, \i3 + eor \i2, \i0 + eor t0j, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, t0j + and t0j, \i2 + eor \i1, t0j +.endm + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + ld x10, Y+ + ld x11, Y+ + ld x12, Y+ + ld x13, Y+ + ld x14, Y+ + ld x15, Y+ + ld x16, Y+ + ld x17, Y+ + ld x20, Y+ + ld x21, Y+ + ld x22, Y+ + ld x23, Y+ + ld x24, Y+ + ld x25, Y+ + ld x26, Y+ + ld x27, Y+ + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH + rjmp For_Hash +For_AEAD: + ldi ZL, lo8(RC_LFSR6) + ldi ZH, hi8(RC_LFSR6) + rjmp round_loop_start +For_Hash: + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) +#elif defined(CRYPTO_AEAD) + ldi ZL, lo8(RC_LFSR6) + ldi ZH, hi8(RC_LFSR6) +#else + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) +#endif + +round_loop_start: + ; AddRC + lpm t0j, Z+ + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + ld x0j, Y + eor x0j, t0j + + ; SubColumns + Sbox x0j, x10, x20, x30 + st Y+, x0j + ld x0j, Y + Sbox x0j, x11, x21, x31 + st Y+, x0j + ld x0j, Y + Sbox x0j, x12, x22, x32 + st Y+, x0j + ld x0j, Y + Sbox x0j, x13, x23, x33 + st Y+, x0j + ld x0j, Y + Sbox x0j, x14, x24, x34 + st Y+, x0j + ld x0j, Y + Sbox x0j, x15, x25, x35 + st Y+, x0j + ld x0j, Y + Sbox x0j, x16, x26, x36 + st Y+, x0j + ld x0j, Y + Sbox x0j, x17, x27, x37 + st Y, x0j + + ; ShiftRows + ; <<< 1 + mov t0j, x17 + rol t0j + rol x10 + rol x11 + rol x12 + rol x13 + rol x14 + rol x15 + rol x16 + rol x17 + + ; <<< 8 + ; 7 6 5 4 3 2 1 0 => 6 5 4 3 2 1 0 7 + ;mov t0j, x27 + ;mov x27, x26 + ;mov x26, x25 + ;mov x25, x24 + ;mov x24, x23 + ;mov x23, x22 + ;mov x22, x21 + ;mov x21, x20 + ;mov x20, t0j + ; an intentionally arrangement of registers to facilitate movw + movw t0j, x23 ; t1j:t0j <= x27:x23 + movw x23, x22 ; x27:x23 <= x26:x22 + movw x22, x21 ; x26:x22 <= x25:x21 + movw x21, x20 ; x25:x21 <= x24:x20 + mov x20, t1j ; x20 <= t1j + mov x24, t0j ; x24 <= t0j + + ; <<< 1 + mov t0j, x37 + rol t0j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + ; <<< 24 + ; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5 + ;mov t0j, x30 + ;mov x30, x35 + ;mov x35, x32 + ;mov x32, x37 + ;mov x37, x34 + ;mov x34, x31 + ;mov x31, x36 + ;mov x36, x33 + ;mov x33, t0j + ; an intentionally arrangement of registers to facilitate movw + ;x30 r16 + ;x35 r18 + ;x32 r20 + ;x37 r22 + ;x34 r17 + ;x31 r19 + ;x36 r21 + ;x33 r23 + movw t0j, x30 ; t1j:t0j <= x34:x30 + movw x30, x35 ; x34:x30 <= x31:x35 + movw x35, x32 ; x31:x35 <= x36:x32 + movw x32, x37 ; x36:x32 <= x33:x37 + mov x37, t1j ; x37 <= x34 + mov x33, t0j ; x33 <= x30 + + dec rcnt + breq round_loop_end + jmp round_loop_start + +round_loop_end: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + st Y+, x10 + st Y+, x11 + st Y+, x12 + st Y+, x13 + st Y+, x14 + st Y+, x15 + st Y+, x16 + st Y+, x17 + st Y+, x20 + st Y+, x21 + st Y+, x22 + st Y+, x23 + st Y+, x24 + st Y+, x25 + st Y+, x26 + st Y+, x27 + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + + POP_CONFLICT +ret + + +.section .text +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +RC_LFSR6: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06 +.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29 +.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28 +.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24 +.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37 +.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26 +.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f +.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00 +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +#elif defined(CRYPTO_AEAD) +RC_LFSR6: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06 +.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29 +.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28 +.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24 +.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37 +.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26 +.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f +.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00 +#else +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot512/avr8_speed/knot384.h b/knot/Implementations/crypto_hash/knot512/avr8_speed/knot384.h new file mode 100644 index 0000000..0b3dd75 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/avr8_speed/knot384.h @@ -0,0 +1,261 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; + +; an intentionally arrangement of registers to facilitate movw +#define x20 r0 +#define x21 r2 +#define x22 r4 +#define x23 r6 +#define x24 r8 +#define x25 r10 +#define x26 r1 +#define x27 r3 +#define x28 r5 +#define x29 r7 +#define x2a r9 +#define x2b r11 + +; an intentionally arrangement of registers to facilitate movw +#define x30 r22 +#define x35 r20 +#define x3a r18 +#define x33 r16 +#define x38 r14 +#define x31 r12 +#define x36 r23 +#define x3b r21 +#define x34 r19 +#define x39 r17 +#define x32 r15 +#define x37 r13 + +#define t0j r24 +#define t1j r25 +#define x0j r25 +#define x1j r27 + +#include "assist.h" + +.macro Sbox i0, i1, i2, i3 + ldi t0j, 0xFF + eor \i0, t0j + mov t0j, \i1 + and \i1, \i0 + eor \i1, \i2 + or \i2, t0j + eor \i0, \i3 + eor \i2, \i0 + eor t0j, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, t0j + and t0j, \i2 + eor \i1, t0j +.endm + +.macro OneColumn i0, i1, i2, i3 + ld \i0, Y + ldd \i1, Y + ROW_INBYTES + Sbox \i0, \i1, \i2, \i3 + st Y+, \i0 + rol \i1 ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES -1, \i1 +.endm + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + ld x20, Y+ + ld x21, Y+ + ld x22, Y+ + ld x23, Y+ + ld x24, Y+ + ld x25, Y+ + ld x26, Y+ + ld x27, Y+ + ld x28, Y+ + ld x29, Y+ + ld x2a, Y+ + ld x2b, Y+ + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) + +round_loop_start: + ; AddRC + lpm t0j, Z+ + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + ld x0j, Y + eor x0j, t0j + + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x20, x30 + st Y+, x0j + lsl x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES -1, x1j + + OneColumn x0j, x1j, x21, x31 + OneColumn x0j, x1j, x22, x32 + OneColumn x0j, x1j, x23, x33 + OneColumn x0j, x1j, x24, x34 + OneColumn x0j, x1j, x25, x35 + OneColumn x0j, x1j, x26, x36 + OneColumn x0j, x1j, x27, x37 + OneColumn x0j, x1j, x28, x38 + OneColumn x0j, x1j, x29, x39 + OneColumn x0j, x1j, x2a, x3a + OneColumn x0j, x1j, x2b, x3b + + ld x1j, Y + eor t0j, t0j + adc x1j, t0j + st Y, x1j + + ; b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- x- -- 1 + ; -- -- -- -- x' -- -- -- -- -- -- -- 7 + ; 4 3 2 1 0 b a 9 8 7 6 5 + + ; ShiftRows -- the last two rows + ; <<< 8 + ; b a 9 8 7 6 5 4 3 2 1 0 => a 9 8 7 6 5 4 3 2 1 0 b + movw t0j, x25 ; t1j:t0j <= x2b:x25 + movw x25, x24 ; x2b:x25 <= x2a:x24 + movw x24, x23 ; x2a:x24 <= x29:x23 + movw x23, x22 ; x29:x23 <= x28:x22 + movw x22, x21 ; x28:x22 <= x27:x21 + movw x21, x20 ; x27:x21 <= x26:x20 + mov x26, t0j ; x26 <= x25 + mov x20, t1j ; x20 <= x2b + + ; >>> 1 + mov t0j, x3b + ror t0j + ror x3a + ror x39 + ror x38 + ror x37 + ror x36 + ror x35 + ror x34 + ror x33 + ror x32 + ror x31 + ror x30 + ror x3b + ; <<< 56 + ; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5 + ; mov x3j, x30 + ; mov x30, x35 + ; mov x35, x3a + ; mov x3a, x33 + ; mov x33, x38 + ; mov x38, x31 + ; mov x31, x36 + ; mov x36, x3b + ; mov x3b, x34 + ; mov x34, x39 + ; mov x39, x32 + ; mov x32, x37 + ; mov x37, x3j + ; an intentionally arrangement of registers to facilitate movw + ; x30 r22 + ; x35 r20 + ; x3a r18 + ; x33 r16 + ; x38 r14 + ; x31 r12 + ; x36 r23 + ; x3b r21 + ; x34 r19 + ; x39 r17 + ; x32 r15 + ; x37 r13 + movw t0j, x30 ; t1j:t0j <= x36:x30 + movw x30, x35 ; x36:x30 <= x3b:x35 + movw x35, x3a ; x3b:x35 <= x34:x3a + movw x3a, x33 ; x34:x3a <= x39:x33 + movw x33, x38 ; x39:x33 <= x32:x38 + movw x38, x31 ; x32:x38 <= x37:x31 + mov x31, t1j ; x31 <= x36 + mov x37, t0j ; x37 <= x30 + + dec rcnt + breq round_loop_end + jmp round_loop_start + +round_loop_end: + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + st Y+, x20 + st Y+, x21 + st Y+, x22 + st Y+, x23 + st Y+, x24 + st Y+, x25 + st Y+, x26 + st Y+, x27 + st Y+, x28 + st Y+, x29 + st Y+, x2a + st Y+, x2b + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + + POP_CONFLICT +ret + +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot512/avr8_speed/knot512.h b/knot/Implementations/crypto_hash/knot512/avr8_speed/knot512.h new file mode 100644 index 0000000..b0e4319 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/avr8_speed/knot512.h @@ -0,0 +1,435 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; +#define x20 r0 +#define x22 r2 +#define x24 r4 +#define x26 r6 +#define x28 r1 +#define x2a r3 +#define x2c r5 +#define x2e r7 + +#define x30 r8 +#define x3d r10 +#define x3a r12 +#define x37 r14 +#define x34 r16 +#define x31 r18 +#define x3e r20 +#define x3b r22 +#define x38 r9 +#define x35 r11 +#define x32 r13 +#define x3f r15 +#define x3c r17 +#define x39 r19 +#define x36 r21 +#define x33 r23 + +#define t0j r24 +#define t1j r25 +#define x0j r25 +#define x1j r27 +#define x2j r26 + +#include "assist.h" + +.macro Sbox i0, i1, i2, i3 + ldi t0j, 0xFF + eor \i0, t0j + mov t0j, \i1 + and \i1, \i0 + eor \i1, \i2 + or \i2, t0j + eor \i0, \i3 + eor \i2, \i0 + eor t0j, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, t0j + and t0j, \i2 + eor \i1, t0j +.endm + +.macro TwoColumns i2_e, i3_e, i3_o + ; column 2i + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, \i2_e, \i3_e + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ; column 2i+1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x2j, \i3_o + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + ldd t0j, Y + 2 * ROW_INBYTES + 1 + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t0j +.endm + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + push rcnt + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + + ldd x20, Y + 0x00 + ldd x22, Y + 0x02 + ldd x24, Y + 0x04 + ldd x26, Y + 0x06 + ldd x28, Y + 0x08 + ldd x2a, Y + 0x0a + ldd x2c, Y + 0x0c + ldd x2e, Y + 0x0e + + adiw YL, ROW_INBYTES + + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + ld x3c, Y+ + ld x3d, Y+ + ld x3e, Y+ + ld x3f, Y+ + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH + rjmp For_Hash +For_AEAD: + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) + rjmp round_loop_start +For_Hash: + ldi ZL, lo8(RC_LFSR8) + ldi ZH, hi8(RC_LFSR8) +#elif defined(CRYPTO_AEAD) + ldi ZL, lo8(RC_LFSR7) + ldi ZH, hi8(RC_LFSR7) +#else + ldi ZL, lo8(RC_LFSR8) + ldi ZH, hi8(RC_LFSR8) +#endif + + +round_loop_start: + ; AddRC + lpm t0j, Z+ + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + + ; column 0 + ld x0j, Y + eor x0j, t0j + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x20, x30 + st Y+, x0j + lsl x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ; column 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + Sbox x0j, x1j, x2j, x31 + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + ldd t0j, Y + 2 * ROW_INBYTES + 1 + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t0j + + ; column 2, 3 + TwoColumns x22, x32, x33 + ; column 4, 5 + TwoColumns x24, x34, x35 + ; column 6, 7 + TwoColumns x26, x36, x37 + ; column 8, 9 + TwoColumns x28, x38, x39 + ; column 10, 11 + TwoColumns x2a, x3a, x3b + ; column 12, 13 + TwoColumns x2c, x3c, x3d + + ; column 14 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x2e, x3e + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ; column 15 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + Sbox x0j, x1j, x2j, x3f + st Y+, x0j + rol x1j ; ShiftRows -- Row 1 <<< 1 + std Y + ROW_INBYTES - 1, x1j + + ld x1j, Y + eor t0j, t0j + adc x1j, t0j + st Y, x1j + std Y + ROW_INBYTES + 1, x2j + + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2 + ; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3 + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + ; x2e x2c x2a x28 x26 x24 x22 x20 => x2c x2a x28 x26 x24 x22 x20 x2e + ;mov t0j, x2e + ;mov x2e, x2c + ;mov x2c, x2a + ;mov x2a, x28 + ;mov x28, x26 + ;mov x26, x24 + ;mov x24, x22 + ;mov x22, x20 + ;mov x20, t0j + ; an intentionally arrangement of registers to facilitate movw + movw t0j, x26 ; t1j:t0j <= x2e:x26 + movw x26, x24 ; x2e:x26 <= x2c:x24 + movw x24, x22 ; x2c:x24 <= x2a:x22 + movw x22, x20 ; x2a:x22 <= x28:x20 + mov x20, t1j ; x20 <= t1j + mov x28, t0j ; x28 <= t0j + + ; <<< 1 + mov t0j, x3f + rol t0j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + rol x38 + rol x39 + rol x3a + rol x3b + rol x3c + rol x3d + rol x3e + rol x3f + ; <<< 24 + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 => + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + ; mov x3j, x30 + ; mov x30, x3d + ; mov x3d, x3a + ; mov x3a, x37 + ; mov x37, x34 + ; mov x34, x31 + ; mov x31, x3e + ; mov x3e, x3b + ; mov x3b, x38 + ; mov x38, x35 + ; mov x35, x32 + ; mov x32, x3f + ; mov x3f, x3c + ; mov x3c, x39 + ; mov x39, x36 + ; mov x36, x33 + ; mov x33, x3j + ; an intentionally arrangement of registers to facilitate movw + ; x30 r8 + ; x3d r10 + ; x3a r12 + ; x37 r14 + ; x34 r16 + ; x31 r18 + ; x3e r20 + ; x3b r22 + ; x38 r9 + ; x35 r11 + ; x32 r13 + ; x3f r15 + ; x3c r17 + ; x39 r19 + ; x36 r21 + ; x33 r23 + movw t0j, x30 ; t1j:t0j <= x38:x30 + movw x30, x3d ; x38:x30 <= x35:x3d + movw x3d, x3a ; x35:x3d <= x32:x3a + movw x3a, x37 ; x32:x3a <= x3f:x37 + movw x37, x34 ; x3f:x37 <= x3c:x34 + movw x34, x31 ; x3c:x34 <= x39:x31 + movw x31, x3e ; x39:x31 <= x36:x3e + movw x3e, x3b ; x36:x3e <= x33:x3b + mov x3b, t1j ; x3b <= x38 + mov x33, t0j ; x33 <= x30 + + pop rcnt + dec rcnt + push rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + pop rcnt + + ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) + std Y + 0x00, x20 + std Y + 0x02, x22 + std Y + 0x04, x24 + std Y + 0x06, x26 + std Y + 0x08, x28 + std Y + 0x0a, x2a + std Y + 0x0c, x2c + std Y + 0x0e, x2e + adiw YL, ROW_INBYTES + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + st Y+, x3c + st Y+, x3d + st Y+, x3e + st Y+, x3f + + POP_CONFLICT +ret + +.section .text +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +RC_LFSR8: +.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e +.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25 +.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0 +.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9 +.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37 +.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20 +.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad +.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac +.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0 +.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb +.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a +.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8 +.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86 +.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7 +.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c +.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5 +.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4 +.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52 +.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7 +.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76 +.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b +.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4 +.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd +.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46 +.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e +.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab +.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5 +.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09 +.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe +.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17 +.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d +.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00 +#elif defined(CRYPTO_AEAD) +RC_LFSR7: +.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 +.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a +.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c +.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b +.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a +.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f +.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 +.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 +.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 +.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 +.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 +.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 +.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c +.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a +.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f +.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 +#else +RC_LFSR8: +.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e +.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25 +.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0 +.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9 +.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37 +.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20 +.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad +.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac +.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0 +.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb +.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a +.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8 +.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86 +.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7 +.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c +.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5 +.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4 +.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52 +.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7 +.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76 +.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b +.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4 +.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd +.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46 +.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e +.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab +.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5 +.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09 +.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe +.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17 +.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d +.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00 +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot512/avr8_speed/permutation.h b/knot/Implementations/crypto_hash/knot512/avr8_speed/permutation.h new file mode 100644 index 0000000..e6c9793 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/avr8_speed/permutation.h @@ -0,0 +1,45 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; R E G I S T E R D E F I N I T I O N S +; ============================================ +; + +#define mclen r16 +#define radlen r17 +#define tcnt r17 +#define tmp0 r20 +#define tmp1 r21 +#define cnt0 r22 +#define rn r23 +#define rate r24 + + +; AEDH = 0b000: for authenticate AD +; AEDH = 0b001: for encryption +; AEDH = 0b011: for decryption +; AEDH = 0b100: for hash +#define AEDH r25 +#define rcnt r26 + +#if (STATE_INBITS==256) +#include "knot256.h" +#elif (STATE_INBITS==384) +#include "knot384.h" +#elif (STATE_INBITS==512) +#include "knot512.h" +#else +#error "Not specified key size and state size" +#endif + +