diff --git a/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/api.h b/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/api.h new file mode 100644 index 0000000..51fc844 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/assist.h b/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/assist.h new file mode 100644 index 0000000..cb903a5 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/assist.h @@ -0,0 +1,140 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +.macro LFSR6_MACRO + bst rc, 5 + bld tmp0, 0 + bst rc, 4 + bld tmp1, 0 + eor tmp0, tmp1 + ror tmp0 + rol rc + andi rc, 0x3F +.endm + +.macro LFSR7_MACRO + bst rc, 6 + bld tmp0, 0 + bst rc, 5 + bld tmp1, 0 + eor tmp0, tmp1 + ror tmp0 + rol rc + andi rc, 0x7F +.endm + +.macro LFSR8_MACRO + bst rc, 7 + bld tmp0, 0 + bst rc, 5 + bld tmp1, 0 + eor tmp0, tmp1 + bst rc, 4 + bld tmp1, 0 + eor tmp0, tmp1 + bst rc, 3 + bld tmp1, 0 + eor tmp0, tmp1 + ror tmp0 + rol rc +.endm + +.macro Sbox i0, i1, i2, i3 + mov tmp0, \i1 + com \i0 + and \i1, \i0 + eor \i1, \i2 + or \i2, tmp0 + eor \i0, \i3 + eor \i2, \i0 + eor tmp0, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, tmp0 + and tmp0, \i2 + eor \i1, tmp0 +.endm + +.macro PUSH_CONFLICT + push r16 + push r17 + push r18 + push r19 + + push r23 + push r24 + + push r26 + push r27 + push r28 + push r29 + push r30 + push r31 +.endm + +.macro POP_CONFLICT + pop r31 + pop r30 + pop r29 + pop r28 + pop r27 + pop r26 + + pop r24 + pop r23 + + pop r19 + pop r18 + pop r17 + pop r16 +.endm + +.macro PUSH_ALL + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 +.endm + +.macro POP_ALL + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + clr r1 +.endm \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/config.h b/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/config.h new file mode 100644 index 0000000..41d8080 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/config.h @@ -0,0 +1,131 @@ +#ifndef __CONFIG_H__ +#define __CONFIG_H__ + +#define CRYPTO_AEAD +//#define CRYPTO_HASH + +#define MAX_MESSAGE_LENGTH 128 + +#define STATE_INBITS 256 +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBITS 128 +/* For CRYPTO_HASH */ +#define CRYPTO_BITS 256 + +#define STATE_INBYTES ((STATE_INBITS + 7) / 8) +#define ROW_INBITS ((STATE_INBITS + 3) / 4) +#define ROW_INBYTES ((ROW_INBITS + 7) / 8) + +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8) +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES +#define CRYPTO_ABYTES CRYPTO_KEYBYTES +#define CRYPTO_NOOVERLAP 1 + +#define MAX_ASSOCIATED_DATA_LENGTH 32 +#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES) + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +/* For CRYPTO_HASH */ +#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8) + + + +#define DOMAIN_BITS 0x80 +#define PAD_BITS 0x01 +#define S384_R192_BITS 0x80 + +#if (STATE_INBITS==256) +#define C1 1 +#define C2 8 +#define C3 25 +#elif (STATE_INBITS==384) +#define C1 1 +#define C2 8 +#define C3 55 +#elif (STATE_INBITS==512) +#define C1 1 +#define C2 16 +#define C3 25 +#else +#error "Not specified state size" +#endif + +#ifdef CRYPTO_AEAD +/* For CRYPTO_AEAD */ +#define KEY_INBITS (CRYPTO_KEYBYTES * 8) +#define KEY_INBYTES (CRYPTO_KEYBYTES) + +#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8) +#define NONCE_INBYTES (CRYPTO_NPUBBYTES) + +#define TAG_INBITS (CRYPTO_ABYTES * 8) +#define TAG_INBYTES (CRYPTO_ABYTES) + +#if (KEY_INBITS==128) && (STATE_INBITS==256) +#define RATE_INBITS 64 +#define NR_0 52 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==128) && (STATE_INBITS==384) +#define RATE_INBITS 192 +#define NR_0 76 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==192) && (STATE_INBITS==384) +#define RATE_INBITS 96 +#define NR_0 76 +#define NR_i 40 +#define NR_f 44 +#elif (KEY_INBITS==256) && (STATE_INBITS==512) +#define RATE_INBITS 128 +#define NR_0 100 +#define NR_i 52 +#define NR_f 56 +#else +#error "Not specified key size and state size" +#endif + +#define RATE_INBYTES ((RATE_INBITS + 7) / 8) +#define SQUEEZE_RATE_INBYTES TAG_INBYTES + +#endif + +#ifdef CRYPTO_HASH +/* For CRYPTO_HASH */ +#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8) + +#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256) +#define HASH_RATE_INBITS 32 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 68 +#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 128 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 80 +#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 48 +#define HASH_SQUEEZE_RATE_INBITS 192 +#define NR_h 104 +#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512) +#define HASH_RATE_INBITS 64 +#define HASH_SQUEEZE_RATE_INBITS 256 +#define NR_h 140 +#else +#error "Not specified hash digest size and state size" +#endif + +#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8) +#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8) + +#endif + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/crypto_aead.h b/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/crypto_aead.h new file mode 100644 index 0000000..cd820d3 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/crypto_aead.h @@ -0,0 +1,26 @@ +#ifdef __cplusplus +extern "C" { +#endif + +int crypto_aead_encrypt( + unsigned char *c,unsigned long long *clen, + const unsigned char *m,unsigned long long mlen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ); + + +int crypto_aead_decrypt( + unsigned char *m,unsigned long long *outputmlen, + unsigned char *nsec, + const unsigned char *c,unsigned long long clen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ); + +#ifdef __cplusplus +} +#endif diff --git a/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/encrypt.c b/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/encrypt.c new file mode 100644 index 0000000..baf0a3b --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/encrypt.c @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#include "config.h" + +extern void crypto_aead_encrypt_asm( + unsigned char *c, + const unsigned char *m, + unsigned char mlen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern int crypto_aead_decrypt_asm( + unsigned char *m, + const unsigned char *c, + unsigned char clen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern void crypto_hash_asm( + unsigned char *out, + const unsigned char *in, + unsigned char inlen + ); + + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the cipher implementation goes here, + ... generating a ciphertext c[0],c[1],...,c[*clen-1] + ... from a plaintext m[0],m[1],...,m[mlen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce npub[0],npub[1],.. + ... and secret key k[0],k[1],... + ... the implementation shall not use nsec + ... + ... return 0; + */ + + (void)nsec; + + crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k); + + *clen = mlen + TAG_INBYTES; + return 0; +} + + + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the AEAD implementation goes here, + ... generating a plaintext m[0],m[1],...,m[*mlen-1] + ... and secret message number nsec[0],nsec[1],... + ... from a ciphertext c[0],c[1],...,c[clen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce number npub[0],npub[1],... + ... and secret key k[0],k[1],... + ... + ... return 0; + */ + unsigned long long mlen_; + unsigned char tag_is_match; + + (void)nsec; + if (clen < CRYPTO_ABYTES) { + return -1; + } + mlen_ = clen - CRYPTO_ABYTES; + + tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k); + + if (tag_is_match != 0) + { + memset(m, 0, (size_t)mlen_); + return -1; + } + + *mlen = mlen_; + return 0; +} \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/encrypt_core.S b/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/encrypt_core.S new file mode 100644 index 0000000..cb7aed5 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/encrypt_core.S @@ -0,0 +1,537 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; S R A M D E F I N I T I O N S +; ============================================ +; +#include +#include "config.h" + +.section .noinit + SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (STATE_INBYTES > 32) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (STATE_INBYTES > 48) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_LEN: .BYTE 0, 0 +#ifdef CRYPTO_AEAD +; For CRYPTO_AEAD + SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0 + SRAM_ADLEN: .BYTE 0, 0 + SRAM_NONCE_ADDR: .BYTE 0, 0 + SRAM_KEY_ADDR: .BYTE 0, 0 + + SRAM_ADDITIONAL: + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (CRYPTO_ABYTES > 16) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (CRYPTO_ABYTES > 24) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + +#endif + +.section .text + +#include "permutation.h" + +; require YH:YL be the address of the current associated data/cipher/message block +; for enc and dec, store ciphertext or plaintext +; require ZH:ZL be the address of the current cipher/message block +XOR_to_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov cnt0, rate +XOR_to_State_loop: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_to_State_loop +; YH:YL are now the address of the next associated data block +ret + +; require YH:YL pointed to the input data +; require ZH:ZL pointed to the output data +; require cnt0 containes the nubmer of bytes in source data +; require number of bytes in source data less than rate, i.e., 0 <= cnt0 < rate +; +; the 0th bit in AEDH is used to distinguish (auth AD) or (enc/dec M/C): +; AEDH[0] = 0 for (auth AD), AEDH[0] = 1 for (enc/dec M/C) +; the 1th bit in AEDH is used to distinguish (auth AD/enc M) or (dec C): +; AEDH[1] = 0 for (auth AD/enc M), AEDH[1] = 1 for (dec C) +; AEDH = 0b000 for (auth AD) +; AEDH = 0b001 for (enc M) +; AEDH = 0b011 for (dec C) +Pad_XOR_to_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + tst cnt0 + breq XOR_padded_data +XOR_source_data_loop: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_source_data_loop +XOR_padded_data: + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 +ret + +AddDomain: + ldi XH, hi8(SRAM_STATE + STATE_INBYTES - 1) + ldi XL, lo8(SRAM_STATE + STATE_INBYTES - 1) + ldi tmp0, DOMAIN_BITS + ld tmp1, X + eor tmp0, tmp1 + st X, tmp0 +ret + +; require ZH:ZL be the address of the destination +EXTRACT_from_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov tmp1, rate +EXTRACT_from_State_loop: + ld tmp0, X+ + st Z+, tmp0 + dec tmp1 + brne EXTRACT_from_State_loop +ret + +AUTH: + tst radlen + breq AUTH_end + + cp radlen, rate + brlo auth_ad_padded_block + +auth_ad_loop: + rcall XOR_to_State + rcall Permutation + sub radlen, rate + cp radlen, rate + brlo auth_ad_padded_block + rjmp auth_ad_loop + +auth_ad_padded_block: + mov cnt0, radlen + rcall Pad_XOR_to_State + rcall Permutation + +AUTH_end: +ret + +#ifdef CRYPTO_AEAD +Initialization: + ldi rn, NR_0 + ldi XL, lo8(SRAM_STATE) + ldi XH, hi8(SRAM_STATE) + + lds YH, SRAM_NONCE_ADDR + lds YL, SRAM_NONCE_ADDR + 1 + ldi cnt0, CRYPTO_NPUBBYTES +load_nonce_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_nonce_loop + + lds YH, SRAM_KEY_ADDR + lds YL, SRAM_KEY_ADDR + 1 + ldi cnt0, CRYPTO_KEYBYTES +load_key_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_key_loop + +#if (STATE_INBITS==384) && (RATE_INBITS==192) + ldi cnt0, (STATE_INBYTES - CRYPTO_NPUBBYTES - CRYPTO_KEYBYTES - 1) + clr tmp0 +empty_state_loop: + st X+, tmp0 + dec cnt0 + brne empty_state_loop + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + rcall Permutation +ret + +ENC: + tst mclen + breq ENC_end + + cp mclen, rate + brlo enc_padded_block + +enc_loop: + rcall XOR_to_State + ldi rn, NR_i + rcall Permutation + sub mclen, rate + cp mclen, rate + brlo enc_padded_block + rjmp enc_loop + +enc_padded_block: + mov cnt0, mclen + rcall Pad_XOR_to_State +ENC_end: +ret + +Finalization: + ldi rate, SQUEEZE_RATE_INBYTES + ldi rn, NR_f + rcall Permutation + rcall EXTRACT_from_State +ret + +; void crypto_aead_encrypt_asm( +; unsigned char *c, +; const unsigned char *m, +; unsigned long long mlen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *c, is passed in r24:r25 +; const unsigned char *m, is passed in r22:r23 +; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_encrypt_asm +crypto_aead_encrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b001 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + rcall Finalization + POP_ALL +ret + +; int crypto_aead_decrypt_asm( +; unsigned char *m, +; const unsigned char *c, +; unsigned long long clen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *m, is passed in r24:r25 +; const unsigned char *c, is passed in r22:r23 +; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_decrypt_asm +crypto_aead_decrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b011 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + + ldi ZH, hi8(SRAM_ADDITIONAL) + ldi ZL, lo8(SRAM_ADDITIONAL) + rcall Finalization + + sbiw ZL, CRYPTO_ABYTES + ldi cnt0, CRYPTO_ABYTES +compare_tag: + ld tmp0, Z+ + ld tmp1, Y+ + cp tmp0, tmp1 + brne return_tag_not_match + dec cnt0 + brne compare_tag + rjmp return_tag_match + +return_tag_not_match: + ldi r25, 0xFF + ldi r24, 0xFF + rjmp crypto_aead_decrypt_end + +return_tag_match: + clr r25 + clr r24 +crypto_aead_decrypt_end: + POP_ALL +ret + +; #ifdef CRYPTO_AEAD +#endif + + +#ifdef CRYPTO_HASH + +; void crypto_hash_asm( +; unsigned char *out, +; const unsigned char *in, +; unsigned long long inlen +; ) +; +; unsigned char *out, is passed in r24:r25 +; const unsigned char *in, is passed in r22:r23 +; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used +.global crypto_hash_asm +crypto_hash_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + mov mclen, r20 + + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi cnt0, STATE_INBYTES - 1 +#else + ldi cnt0, STATE_INBYTES +#endif + clr tmp0 +zero_state: + st X+, tmp0 + dec cnt0 + brne zero_state + +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + ldi rn, NR_h + ldi AEDH, 0b100 + +HASH_ABSORBING: + mov radlen, mclen + tst radlen + breq EMPTY_M + ldi rate, HASH_RATE_INBYTES + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + rcall AUTH + rjmp HASH_SQUEEZING + +EMPTY_M: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 + rcall Permutation + +HASH_SQUEEZING: + ldi rate, HASH_SQUEEZE_RATE_INBYTES + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + ldi tcnt, CRYPTO_BYTES +SQUEEZING_loop: + rcall EXTRACT_from_State + subi tcnt, HASH_SQUEEZE_RATE_INBYTES + breq HASH_SQUEEZING_end + rcall Permutation + rjmp SQUEEZING_loop +HASH_SQUEEZING_end: + POP_ALL +ret + +#endif + + +; Byte Order In AVR 8: +; KNOT-AEAD(128, 256, 64): +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[7] <<< 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Perm_row_1[0] 1 +; N[ 9] AEAD_State[ 9] | Perm_row_1[1] 1 +; N[10] AEAD_State[10] | Perm_row_1[2] 1 +; N[11] AEAD_State[11] | Perm_row_1[3] 1 +; N[12] AEAD_State[12] | Perm_row_1[4] 1 +; N[13] AEAD_State[13] | Perm_row_1[5] 1 +; N[14] AEAD_State[14] | Perm_row_1[6] 1 +; N[15] AEAD_State[15] | Perm_row_1[7] <<< 1 +; K[ 0] AEAD_State[16] | Perm_row_2[0] 8 +; K[ 1] AEAD_State[17] | Perm_row_2[1] 8 +; K[ 2] AEAD_State[18] | Perm_row_2[2] 8 +; K[ 3] AEAD_State[19] | Perm_row_2[3] 8 +; K[ 4] AEAD_State[20] | Perm_row_2[4] 8 +; K[ 5] AEAD_State[21] | Perm_row_2[5] 8 +; K[ 6] AEAD_State[22] | Perm_row_2[6] 8 +; K[ 7] AEAD_State[23] | Perm_row_2[7] <<< 8 +; K[ 8] AEAD_State[24] | Perm_row_3[0] 25 +; K[ 9] AEAD_State[25] | Perm_row_3[1] 25 +; K[10] AEAD_State[26] | Perm_row_3[2] 25 +; K[11] AEAD_State[27] | Perm_row_3[3] 25 +; K[12] AEAD_State[28] | Perm_row_3[4] 25 +; K[13] AEAD_State[29] | Perm_row_3[5] 25 +; K[14] AEAD_State[30] | Perm_row_3[6] 25 +; K[15] AEAD_State[31] | ^0x80 Perm_row_3[7] <<< 25 +; +; +; KNOT-AEAD(128, 384, 192): +; Initalization +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[ 0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[ 1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[ 2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[ 3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[ 4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[ 5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[ 6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[ 7] 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Message[ 8] 0x00 Perm_row_0[ 8] 0 Tag[ 8] +; N[ 9] AEAD_State[ 9] | Message[ 9] 0x00 Perm_row_0[ 9] 0 Tag[ 9] +; N[10] AEAD_State[10] | Message[10] 0x00 Perm_row_0[10] 0 Tag[10] +; N[11] AEAD_State[11] | Message[11] 0x00 Perm_row_0[11] <<< 0 Tag[11] +; N[12] AEAD_State[12] | Message[12] 0x00 Perm_row_1[ 0] 1 Tag[12] +; N[13] AEAD_State[13] | Message[13] 0x00 Perm_row_1[ 1] 1 Tag[13] +; N[14] AEAD_State[14] | Message[14] 0x00 Perm_row_1[ 2] 1 Tag[14] +; N[15] AEAD_State[15] | Message[15] 0x00 Perm_row_1[ 3] 1 Tag[15] +; K[ 0] AEAD_State[16] | Message[16] 0x00 Perm_row_1[ 4] 1 +; K[ 1] AEAD_State[17] | Message[17] 0x00 Perm_row_1[ 5] 1 +; K[ 2] AEAD_State[18] | Message[18] 0x00 Perm_row_1[ 6] 1 +; K[ 3] AEAD_State[19] | Message[19] 0x00 Perm_row_1[ 7] 1 +; K[ 4] AEAD_State[20] | Message[20] 0x00 Perm_row_1[ 8] 1 +; K[ 5] AEAD_State[21] | Message[21] 0x00 Perm_row_1[ 9] 1 +; K[ 6] AEAD_State[22] | Message[22] 0x00 Perm_row_1[10] 1 +; K[ 7] AEAD_State[23] | Message[23] 0x00 Perm_row_1[11] <<< 1 +; K[ 8] AEAD_State[24] | Perm_row_2[ 0] 8 +; K[ 9] AEAD_State[25] | Perm_row_2[ 1] 8 +; K[10] AEAD_State[26] | Perm_row_2[ 2] 8 +; K[11] AEAD_State[27] | Perm_row_2[ 3] 8 +; K[12] AEAD_State[28] | Perm_row_2[ 4] 8 +; K[13] AEAD_State[29] | Perm_row_2[ 5] 8 +; K[14] AEAD_State[30] | Perm_row_2[ 6] 8 +; K[15] AEAD_State[31] | Perm_row_2[ 7] 8 +; 0x00 AEAD_State[32] | Perm_row_2[ 8] 8 +; 0x00 AEAD_State[33] | Perm_row_2[ 9] 8 +; 0x00 AEAD_State[34] | Perm_row_2[10] 8 +; 0x00 AEAD_State[35] | Perm_row_2[11] <<< 8 +; 0x00 AEAD_State[36] | Perm_row_3[ 0] 55 +; 0x00 AEAD_State[37] | Perm_row_3[ 1] 55 +; 0x00 AEAD_State[38] | Perm_row_3[ 2] 55 +; 0x00 AEAD_State[39] | Perm_row_3[ 3] 55 +; 0x00 AEAD_State[40] | Perm_row_3[ 4] 55 +; 0x00 AEAD_State[41] | Perm_row_3[ 5] 55 +; 0x00 AEAD_State[42] | Perm_row_3[ 6] 55 +; 0x00 AEAD_State[43] | Perm_row_3[ 7] 55 +; 0x00 AEAD_State[44] | Perm_row_3[ 8] 55 +; 0x00 AEAD_State[45] | Perm_row_3[ 9] 55 +; 0x00 AEAD_State[46] | Perm_row_3[10] 55 +; 0x00 ^0x80 AEAD_State[47] | ^0x80 Perm_row_3[11] <<< 55 diff --git a/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/knot256.h b/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/knot256.h new file mode 100644 index 0000000..d16bf8c --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/knot256.h @@ -0,0 +1,197 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +#include "assist.h" + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi rc, 0x01 + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + +round_loop_start: + rjmp AddRC_SubColumns_Start + +load_columns_table: + rjmp load_column0 + rjmp load_column1 + rjmp load_column2 + rjmp load_column3 + rjmp load_column4 + rjmp load_column5 + rjmp load_column6 + rjmp load_column7 + rjmp amend_shiftRow + +load_column0: + mov x3j, x30 + rjmp Sbox_one_column +load_column1: + mov x30, x3j + mov x3j, x31 + rjmp Sbox_one_column +load_column2: + mov x31, x3j + mov x3j, x32 + rjmp Sbox_one_column +load_column3: + mov x32, x3j + mov x3j, x33 + rjmp Sbox_one_column +load_column4: + mov x33, x3j + mov x3j, x34 + rjmp Sbox_one_column +load_column5: + mov x34, x3j + mov x3j, x35 + rjmp Sbox_one_column +load_column6: + mov x35, x3j + mov x3j, x36 + rjmp Sbox_one_column +load_column7: + mov x36, x3j + mov x3j, x37 + rjmp Sbox_one_column + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +LFSR_table: + rjmp LFSR6 + rjmp LFSR7 +LFSR6: + LFSR6_MACRO + rjmp LFSR_DONE +LFSR7: + LFSR7_MACRO + rjmp LFSR_DONE +#endif + +;;;;;;;;;;;;;;;;;;;;;;;; Real Start +AddRC_SubColumns_Start: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + clr ccnt + ld x0j, Y + eor x0j, rc + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + ldi ZL, pm_lo8(LFSR_table) + ldi ZH, pm_hi8(LFSR_table) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[1] = 1 for HASH + adiw ZL, 1 + ijmp +LFSR_DONE: +#elif defined(CRYPTO_AEAD) + LFSR6_MACRO ; only AEAD +#else + LFSR7_MACRO ; only HASH +#endif + + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + ldi ZL, pm_lo8(load_columns_table) + ldi ZH, pm_hi8(load_columns_table) + ijmp +Sbox_one_column: + Sbox x0j, x1j, x2j, x3j + + ; 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- x- -- 1 + ; -- -- -- -- x' -- -- -- 3 + ; 4 3 2 1 0 7 6 5 + ; Store a byte to Row 0 + st Y, x0j + ; Store a byte combined with ShiftRow1 + lsl t1j + mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left) + rol x1j + std Y + ROW_INBYTES, x1j + ; Store a byte combined with ShiftRow2 + inc ccnt + cpi ccnt, ROW_INBYTES + breq ROW2_WRAP + ldd t2j, Y + 2 * ROW_INBYTES + 1 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t2j + jmp NO_ROW2_WRAP +ROW2_WRAP: + std Y + ROW_INBYTES + 1, x2j + ; remain ShiftRow3 to be done at 'amend_shiftRow' +NO_ROW2_WRAP: + adiw YL, 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + + adiw ZL, 1 + ijmp + +amend_shiftRow: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + + ld x1j, Y + bst t1j, 7 + bld x1j, 0 + st Y, x1j + + ; <<< 1 + mov x37, x3j + rol x3j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + ; <<< 24 + ; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5 + mov x3j, x30 + mov x30, x35 + mov x35, x32 + mov x32, x37 + mov x37, x34 + mov x34, x31 + mov x31, x36 + mov x36, x33 + mov x33, x3j + + dec rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + + POP_CONFLICT +ret \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/knot384.h b/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/knot384.h new file mode 100644 index 0000000..65c474a --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/knot384.h @@ -0,0 +1,219 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +#include "assist.h" + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi rc, 0x01 + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + +round_loop_start: + rjmp AddRC_SubColumns_Start + +load_columns_table: + rjmp load_column0 + rjmp load_column1 + rjmp load_column2 + rjmp load_column3 + rjmp load_column4 + rjmp load_column5 + rjmp load_column6 + rjmp load_column7 + rjmp load_column8 + rjmp load_column9 + rjmp load_columna + rjmp load_columnb + rjmp amend_shiftRow + +load_column0: + mov x3j, x30 + rjmp Sbox_one_column +load_column1: + mov x30, x3j + mov x3j, x31 + rjmp Sbox_one_column +load_column2: + mov x31, x3j + mov x3j, x32 + rjmp Sbox_one_column +load_column3: + mov x32, x3j + mov x3j, x33 + rjmp Sbox_one_column +load_column4: + mov x33, x3j + mov x3j, x34 + rjmp Sbox_one_column +load_column5: + mov x34, x3j + mov x3j, x35 + rjmp Sbox_one_column +load_column6: + mov x35, x3j + mov x3j, x36 + rjmp Sbox_one_column +load_column7: + mov x36, x3j + mov x3j, x37 + rjmp Sbox_one_column +load_column8: + mov x37, x3j + mov x3j, x38 + rjmp Sbox_one_column +load_column9: + mov x38, x3j + mov x3j, x39 + rjmp Sbox_one_column +load_columna: + mov x39, x3j + mov x3j, x3a + rjmp Sbox_one_column +load_columnb: + mov x3a, x3j + mov x3j, x3b + rjmp Sbox_one_column + +;;;;;;;;;;;;;;;;;;;;;;;; Real Start +AddRC_SubColumns_Start: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + ldi ZL, pm_lo8(load_columns_table) + ldi ZH, pm_hi8(load_columns_table) + clr ccnt + ld x0j, Y + eor x0j, rc + LFSR7_MACRO + + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + ijmp +Sbox_one_column: + Sbox x0j, x1j, x2j, x3j + + ; b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- x- -- 1 + ; -- -- -- -- x' -- -- -- -- -- -- -- 7 + ; 4 3 2 1 0 b a 9 8 7 6 5 + ; Store a byte to Row 0 + st Y, x0j + ; Store a byte combined with ShiftRow 1 + lsl t1j + mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left) + rol x1j + std Y + ROW_INBYTES, x1j + ; Store a byte combined with ShiftRow 2 + inc ccnt + cpi ccnt, ROW_INBYTES + breq ROW2_WRAP + ldd t2j, Y + 2 * ROW_INBYTES + 1 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t2j + jmp NO_ROW2_WRAP +ROW2_WRAP: + std Y + ROW_INBYTES + 1, x2j + ; remain ShiftRow3 to be done at 'amend_shiftRow' +NO_ROW2_WRAP: + adiw YL, 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + + adiw ZL, 1 + ijmp + +amend_shiftRow: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + + ld x1j, Y + bst t1j, 7 + bld x1j, 0 + st Y, x1j + + ; >>> 1 + mov x3b, x3j + ror x3j + ror x3a + ror x39 + ror x38 + ror x37 + ror x36 + ror x35 + ror x34 + ror x33 + ror x32 + ror x31 + ror x30 + ror x3b + ; <<< 56 + ; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5 + ;mov x3j, x30 + ;mov x30, x35 + ;mov x35, x32 + ;mov x32, x37 + ;mov x37, x34 + ;mov x34, x31 + ;mov x31, x36 + ;mov x36, x33 + ;mov x33, x3j + mov x3j, x30 + mov x30, x35 + mov x35, x3a + mov x3a, x33 + mov x33, x38 + mov x38, x31 + mov x31, x36 + mov x36, x3b + mov x3b, x34 + mov x34, x39 + mov x39, x32 + mov x32, x37 + mov x37, x3j + + dec rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + + POP_CONFLICT +ret \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/knot512.h b/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/knot512.h new file mode 100644 index 0000000..d24b353 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/knot512.h @@ -0,0 +1,275 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +#include "assist.h" + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi rc, 0x01 + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + ld x3c, Y+ + ld x3d, Y+ + ld x3e, Y+ + ld x3f, Y+ + +round_loop_start: + rjmp AddRC_SubColumns_Start + +load_columns_table: + rjmp load_column0 + rjmp load_column1 + rjmp load_column2 + rjmp load_column3 + rjmp load_column4 + rjmp load_column5 + rjmp load_column6 + rjmp load_column7 + rjmp load_column8 + rjmp load_column9 + rjmp load_columna + rjmp load_columnb + rjmp load_columnc + rjmp load_columnd + rjmp load_columne + rjmp load_columnf + rjmp amend_shiftRow + +load_column0: + mov x3j, x30 + rjmp Sbox_one_column +load_column1: + mov x30, x3j + mov x3j, x31 + rjmp Sbox_one_column +load_column2: + mov x31, x3j + mov x3j, x32 + rjmp Sbox_one_column +load_column3: + mov x32, x3j + mov x3j, x33 + rjmp Sbox_one_column +load_column4: + mov x33, x3j + mov x3j, x34 + rjmp Sbox_one_column +load_column5: + mov x34, x3j + mov x3j, x35 + rjmp Sbox_one_column +load_column6: + mov x35, x3j + mov x3j, x36 + rjmp Sbox_one_column +load_column7: + mov x36, x3j + mov x3j, x37 + rjmp Sbox_one_column +load_column8: + mov x37, x3j + mov x3j, x38 + rjmp Sbox_one_column +load_column9: + mov x38, x3j + mov x3j, x39 + rjmp Sbox_one_column +load_columna: + mov x39, x3j + mov x3j, x3a + rjmp Sbox_one_column +load_columnb: + mov x3a, x3j + mov x3j, x3b + rjmp Sbox_one_column +load_columnc: + mov x3b, x3j + mov x3j, x3c + rjmp Sbox_one_column +load_columnd: + mov x3c, x3j + mov x3j, x3d + rjmp Sbox_one_column +load_columne: + mov x3d, x3j + mov x3j, x3e + rjmp Sbox_one_column +load_columnf: + mov x3e, x3j + mov x3j, x3f + rjmp Sbox_one_column + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +LFSR_table: + rjmp LFSR7 + rjmp LFSR8 +LFSR7: + LFSR7_MACRO + rjmp LFSR_DONE +LFSR8: + LFSR8_MACRO + rjmp LFSR_DONE +#endif + +;;;;;;;;;;;;;;;;;;;;;;;; Real Start +AddRC_SubColumns_Start: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + clr ccnt + ld x0j, Y + eor x0j, rc + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + ldi ZL, pm_lo8(LFSR_table) + ldi ZH, pm_hi8(LFSR_table) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[1] = 1 for HASH + adiw ZL, 1 + ijmp +LFSR_DONE: +#elif defined(CRYPTO_AEAD) + LFSR7_MACRO ; only AEAD +#else + LFSR8_MACRO ; only HASH +#endif + + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + ldd t2j, Y + 2 * ROW_INBYTES + 1 + ldi ZL, pm_lo8(load_columns_table) + ldi ZH, pm_hi8(load_columns_table) + ijmp +Sbox_one_column: + Sbox x0j, x1j, x2j, x3j + + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2 + ; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3 + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + ; Store a byte to Row 0 + st Y, x0j + ; Store a byte combined with ShiftRow1 + lsl t1j + mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left) + rol x1j + std Y + ROW_INBYTES, x1j + ; Store a byte combined with ShiftRow2 + inc ccnt + cpi ccnt, ROW_INBYTES - 1 + brsh ROW2_WRAP + ldd tmp0, Y + 2 * ROW_INBYTES + 2 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes + std Y + 2 * ROW_INBYTES + 2, x2j + mov x2j, t2j + mov t2j, tmp0 + jmp NO_ROW2_WRAP +ROW2_WRAP: + std Y + ROW_INBYTES + 2, x2j + mov x2j, t2j + + ; remain ShiftRow3 to be done at 'amend_shiftRow' +NO_ROW2_WRAP: + adiw YL, 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + + adiw ZL, 1 + ijmp + +amend_shiftRow: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + + ld x1j, Y + bst t1j, 7 + bld x1j, 0 + st Y, x1j + + ; <<< 1 + mov x3f, x3j + rol x3j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + rol x38 + rol x39 + rol x3a + rol x3b + rol x3c + rol x3d + rol x3e + rol x3f + ; <<< 24 + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 => + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + mov x3j, x30 + mov x30, x3d + mov x3d, x3a + mov x3a, x37 + mov x37, x34 + mov x34, x31 + mov x31, x3e + mov x3e, x3b + mov x3b, x38 + mov x38, x35 + mov x35, x32 + mov x32, x3f + mov x3f, x3c + mov x3c, x39 + mov x39, x36 + mov x36, x33 + mov x33, x3j + + dec rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + st Y+, x3c + st Y+, x3d + st Y+, x3e + st Y+, x3f + + POP_CONFLICT +ret \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/permutation.h b/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/permutation.h new file mode 100644 index 0000000..a57c5d3 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/avr8_lowrom/permutation.h @@ -0,0 +1,109 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; R E G I S T E R D E F I N I T I O N S +; ============================================ +; + +#define mclen r16 +#define radlen r17 +#define tcnt r17 +#define tmp0 r20 +#define tmp1 r21 +#define cnt0 r22 +#define rn r23 +#define rate r24 + +; +; ; AEDH = 0b000: for authenticate AD +; ; AEDH = 0b001: for encryption +; ; AEDH = 0b011: for decryption +; ; AEDH = 0b100: for hash +; #define AEDH r25 ; Register used globally within this program +; +; #define x30 r0 ; Register used without overlapping +; #define x31 r1 ; Register used without overlapping +; #define x32 r2 ; Register used without overlapping +; #define x33 r3 ; Register used without overlapping +; #define x34 r4 ; Register used without overlapping +; #define x35 r5 ; Register used without overlapping +; #define x36 r6 ; Register used without overlapping +; #define x37 r7 ; Register used without overlapping +; #define x38 r8 ; Register used without overlapping +; #define x39 r9 ; Register used without overlapping +; #define x3a r10 ; Register used without overlapping +; #define x3b r11 ; Register used without overlapping +; #define x3c r12 ; Register used without overlapping +; #define x3d r13 ; Register used without overlapping +; #define x3e r14 ; Register used without overlapping +; #define x3f r15 ; Register used without overlapping +; +; #define x0j r16 ; Register used overlapped, should be backed up before using +; #define x1j r17 ; Register used overlapped, should be backed up before using +; #define x2j r18 ; Register used overlapped, should be backed up before using +; #define x3j r19 ; Register used overlapped, should be backed up before using +; +; ; t2j used in knot512 to keep one byte in Row2 (because of rotating 16-bit), +; ; will not be interupt with LFSR which uses the overlapped register tmp1 +; #define t2j r21 ; Temporary register, used freely +; #define t1j r22 ; Temporary register, used freely +; #define t3j r23 ; Temporary register, used freely +; +; #define rc r24 ; Register used overlapped, should be backed up before using +; #define rcnt r26 ; Register used overlapped, should be backed up before using +; #define ccnt r27 ; Register used overlapped, should be backed up before using + +#define AEDH r25 +#define x30 r0 +#define x31 r1 +#define x32 r2 +#define x33 r3 +#define x34 r4 +#define x35 r5 +#define x36 r6 +#define x37 r7 +#define x38 r8 +#define x39 r9 +#define x3a r10 +#define x3b r11 +#define x3c r12 +#define x3d r13 +#define x3e r14 +#define x3f r15 + +#define x0j r16 +#define x1j r17 +#define x2j r18 +#define x3j r19 + +; t2j used in knot512 to keep one byte in Row2 (because of rotating 16-bit), +; will not be interupt with LFSR which uses the overlapped register tmp1 +#define t2j r21 +#define t1j r22 +#define t3j r23 + +#define rc r24 +#define rcnt r26 +#define ccnt r27 + +#if (STATE_INBITS==256) +#include "knot256.h" +#elif (STATE_INBITS==384) +#include "knot384.h" +#elif (STATE_INBITS==512) +#include "knot512.h" +#else +#error "Not specified key size and state size" +#endif + + diff --git a/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/api.h b/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/api.h new file mode 100644 index 0000000..51fc844 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/assist.h b/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/assist.h new file mode 100644 index 0000000..cb903a5 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/assist.h @@ -0,0 +1,140 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +.macro LFSR6_MACRO + bst rc, 5 + bld tmp0, 0 + bst rc, 4 + bld tmp1, 0 + eor tmp0, tmp1 + ror tmp0 + rol rc + andi rc, 0x3F +.endm + +.macro LFSR7_MACRO + bst rc, 6 + bld tmp0, 0 + bst rc, 5 + bld tmp1, 0 + eor tmp0, tmp1 + ror tmp0 + rol rc + andi rc, 0x7F +.endm + +.macro LFSR8_MACRO + bst rc, 7 + bld tmp0, 0 + bst rc, 5 + bld tmp1, 0 + eor tmp0, tmp1 + bst rc, 4 + bld tmp1, 0 + eor tmp0, tmp1 + bst rc, 3 + bld tmp1, 0 + eor tmp0, tmp1 + ror tmp0 + rol rc +.endm + +.macro Sbox i0, i1, i2, i3 + mov tmp0, \i1 + com \i0 + and \i1, \i0 + eor \i1, \i2 + or \i2, tmp0 + eor \i0, \i3 + eor \i2, \i0 + eor tmp0, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, tmp0 + and tmp0, \i2 + eor \i1, tmp0 +.endm + +.macro PUSH_CONFLICT + push r16 + push r17 + push r18 + push r19 + + push r23 + push r24 + + push r26 + push r27 + push r28 + push r29 + push r30 + push r31 +.endm + +.macro POP_CONFLICT + pop r31 + pop r30 + pop r29 + pop r28 + pop r27 + pop r26 + + pop r24 + pop r23 + + pop r19 + pop r18 + pop r17 + pop r16 +.endm + +.macro PUSH_ALL + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 +.endm + +.macro POP_ALL + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + clr r1 +.endm \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/config.h b/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/config.h new file mode 100644 index 0000000..98114a9 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/config.h @@ -0,0 +1,131 @@ +#ifndef __CONFIG_H__ +#define __CONFIG_H__ + +#define CRYPTO_AEAD +//#define CRYPTO_HASH + +#define MAX_MESSAGE_LENGTH 128 + +#define STATE_INBITS 384 +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBITS 128 +/* For CRYPTO_HASH */ +#define CRYPTO_BITS 256 + +#define STATE_INBYTES ((STATE_INBITS + 7) / 8) +#define ROW_INBITS ((STATE_INBITS + 3) / 4) +#define ROW_INBYTES ((ROW_INBITS + 7) / 8) + +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8) +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES +#define CRYPTO_ABYTES CRYPTO_KEYBYTES +#define CRYPTO_NOOVERLAP 1 + +#define MAX_ASSOCIATED_DATA_LENGTH 32 +#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES) + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +/* For CRYPTO_HASH */ +#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8) + + + +#define DOMAIN_BITS 0x80 +#define PAD_BITS 0x01 +#define S384_R192_BITS 0x80 + +#if (STATE_INBITS==256) +#define C1 1 +#define C2 8 +#define C3 25 +#elif (STATE_INBITS==384) +#define C1 1 +#define C2 8 +#define C3 55 +#elif (STATE_INBITS==512) +#define C1 1 +#define C2 16 +#define C3 25 +#else +#error "Not specified state size" +#endif + +#ifdef CRYPTO_AEAD +/* For CRYPTO_AEAD */ +#define KEY_INBITS (CRYPTO_KEYBYTES * 8) +#define KEY_INBYTES (CRYPTO_KEYBYTES) + +#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8) +#define NONCE_INBYTES (CRYPTO_NPUBBYTES) + +#define TAG_INBITS (CRYPTO_ABYTES * 8) +#define TAG_INBYTES (CRYPTO_ABYTES) + +#if (KEY_INBITS==128) && (STATE_INBITS==256) +#define RATE_INBITS 64 +#define NR_0 52 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==128) && (STATE_INBITS==384) +#define RATE_INBITS 192 +#define NR_0 76 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==192) && (STATE_INBITS==384) +#define RATE_INBITS 96 +#define NR_0 76 +#define NR_i 40 +#define NR_f 44 +#elif (KEY_INBITS==256) && (STATE_INBITS==512) +#define RATE_INBITS 128 +#define NR_0 100 +#define NR_i 52 +#define NR_f 56 +#else +#error "Not specified key size and state size" +#endif + +#define RATE_INBYTES ((RATE_INBITS + 7) / 8) +#define SQUEEZE_RATE_INBYTES TAG_INBYTES + +#endif + +#ifdef CRYPTO_HASH +/* For CRYPTO_HASH */ +#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8) + +#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256) +#define HASH_RATE_INBITS 32 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 68 +#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 128 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 80 +#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 48 +#define HASH_SQUEEZE_RATE_INBITS 192 +#define NR_h 104 +#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512) +#define HASH_RATE_INBITS 64 +#define HASH_SQUEEZE_RATE_INBITS 256 +#define NR_h 140 +#else +#error "Not specified hash digest size and state size" +#endif + +#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8) +#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8) + +#endif + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/crypto_aead.h b/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/crypto_aead.h new file mode 100644 index 0000000..cd820d3 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/crypto_aead.h @@ -0,0 +1,26 @@ +#ifdef __cplusplus +extern "C" { +#endif + +int crypto_aead_encrypt( + unsigned char *c,unsigned long long *clen, + const unsigned char *m,unsigned long long mlen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ); + + +int crypto_aead_decrypt( + unsigned char *m,unsigned long long *outputmlen, + unsigned char *nsec, + const unsigned char *c,unsigned long long clen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ); + +#ifdef __cplusplus +} +#endif diff --git a/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/encrypt.c b/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/encrypt.c new file mode 100644 index 0000000..baf0a3b --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/encrypt.c @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#include "config.h" + +extern void crypto_aead_encrypt_asm( + unsigned char *c, + const unsigned char *m, + unsigned char mlen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern int crypto_aead_decrypt_asm( + unsigned char *m, + const unsigned char *c, + unsigned char clen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern void crypto_hash_asm( + unsigned char *out, + const unsigned char *in, + unsigned char inlen + ); + + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the cipher implementation goes here, + ... generating a ciphertext c[0],c[1],...,c[*clen-1] + ... from a plaintext m[0],m[1],...,m[mlen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce npub[0],npub[1],.. + ... and secret key k[0],k[1],... + ... the implementation shall not use nsec + ... + ... return 0; + */ + + (void)nsec; + + crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k); + + *clen = mlen + TAG_INBYTES; + return 0; +} + + + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the AEAD implementation goes here, + ... generating a plaintext m[0],m[1],...,m[*mlen-1] + ... and secret message number nsec[0],nsec[1],... + ... from a ciphertext c[0],c[1],...,c[clen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce number npub[0],npub[1],... + ... and secret key k[0],k[1],... + ... + ... return 0; + */ + unsigned long long mlen_; + unsigned char tag_is_match; + + (void)nsec; + if (clen < CRYPTO_ABYTES) { + return -1; + } + mlen_ = clen - CRYPTO_ABYTES; + + tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k); + + if (tag_is_match != 0) + { + memset(m, 0, (size_t)mlen_); + return -1; + } + + *mlen = mlen_; + return 0; +} \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/encrypt_core.S b/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/encrypt_core.S new file mode 100644 index 0000000..cb7aed5 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/encrypt_core.S @@ -0,0 +1,537 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; S R A M D E F I N I T I O N S +; ============================================ +; +#include +#include "config.h" + +.section .noinit + SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (STATE_INBYTES > 32) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (STATE_INBYTES > 48) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_LEN: .BYTE 0, 0 +#ifdef CRYPTO_AEAD +; For CRYPTO_AEAD + SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0 + SRAM_ADLEN: .BYTE 0, 0 + SRAM_NONCE_ADDR: .BYTE 0, 0 + SRAM_KEY_ADDR: .BYTE 0, 0 + + SRAM_ADDITIONAL: + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (CRYPTO_ABYTES > 16) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (CRYPTO_ABYTES > 24) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + +#endif + +.section .text + +#include "permutation.h" + +; require YH:YL be the address of the current associated data/cipher/message block +; for enc and dec, store ciphertext or plaintext +; require ZH:ZL be the address of the current cipher/message block +XOR_to_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov cnt0, rate +XOR_to_State_loop: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_to_State_loop +; YH:YL are now the address of the next associated data block +ret + +; require YH:YL pointed to the input data +; require ZH:ZL pointed to the output data +; require cnt0 containes the nubmer of bytes in source data +; require number of bytes in source data less than rate, i.e., 0 <= cnt0 < rate +; +; the 0th bit in AEDH is used to distinguish (auth AD) or (enc/dec M/C): +; AEDH[0] = 0 for (auth AD), AEDH[0] = 1 for (enc/dec M/C) +; the 1th bit in AEDH is used to distinguish (auth AD/enc M) or (dec C): +; AEDH[1] = 0 for (auth AD/enc M), AEDH[1] = 1 for (dec C) +; AEDH = 0b000 for (auth AD) +; AEDH = 0b001 for (enc M) +; AEDH = 0b011 for (dec C) +Pad_XOR_to_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + tst cnt0 + breq XOR_padded_data +XOR_source_data_loop: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_source_data_loop +XOR_padded_data: + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 +ret + +AddDomain: + ldi XH, hi8(SRAM_STATE + STATE_INBYTES - 1) + ldi XL, lo8(SRAM_STATE + STATE_INBYTES - 1) + ldi tmp0, DOMAIN_BITS + ld tmp1, X + eor tmp0, tmp1 + st X, tmp0 +ret + +; require ZH:ZL be the address of the destination +EXTRACT_from_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov tmp1, rate +EXTRACT_from_State_loop: + ld tmp0, X+ + st Z+, tmp0 + dec tmp1 + brne EXTRACT_from_State_loop +ret + +AUTH: + tst radlen + breq AUTH_end + + cp radlen, rate + brlo auth_ad_padded_block + +auth_ad_loop: + rcall XOR_to_State + rcall Permutation + sub radlen, rate + cp radlen, rate + brlo auth_ad_padded_block + rjmp auth_ad_loop + +auth_ad_padded_block: + mov cnt0, radlen + rcall Pad_XOR_to_State + rcall Permutation + +AUTH_end: +ret + +#ifdef CRYPTO_AEAD +Initialization: + ldi rn, NR_0 + ldi XL, lo8(SRAM_STATE) + ldi XH, hi8(SRAM_STATE) + + lds YH, SRAM_NONCE_ADDR + lds YL, SRAM_NONCE_ADDR + 1 + ldi cnt0, CRYPTO_NPUBBYTES +load_nonce_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_nonce_loop + + lds YH, SRAM_KEY_ADDR + lds YL, SRAM_KEY_ADDR + 1 + ldi cnt0, CRYPTO_KEYBYTES +load_key_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_key_loop + +#if (STATE_INBITS==384) && (RATE_INBITS==192) + ldi cnt0, (STATE_INBYTES - CRYPTO_NPUBBYTES - CRYPTO_KEYBYTES - 1) + clr tmp0 +empty_state_loop: + st X+, tmp0 + dec cnt0 + brne empty_state_loop + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + rcall Permutation +ret + +ENC: + tst mclen + breq ENC_end + + cp mclen, rate + brlo enc_padded_block + +enc_loop: + rcall XOR_to_State + ldi rn, NR_i + rcall Permutation + sub mclen, rate + cp mclen, rate + brlo enc_padded_block + rjmp enc_loop + +enc_padded_block: + mov cnt0, mclen + rcall Pad_XOR_to_State +ENC_end: +ret + +Finalization: + ldi rate, SQUEEZE_RATE_INBYTES + ldi rn, NR_f + rcall Permutation + rcall EXTRACT_from_State +ret + +; void crypto_aead_encrypt_asm( +; unsigned char *c, +; const unsigned char *m, +; unsigned long long mlen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *c, is passed in r24:r25 +; const unsigned char *m, is passed in r22:r23 +; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_encrypt_asm +crypto_aead_encrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b001 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + rcall Finalization + POP_ALL +ret + +; int crypto_aead_decrypt_asm( +; unsigned char *m, +; const unsigned char *c, +; unsigned long long clen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *m, is passed in r24:r25 +; const unsigned char *c, is passed in r22:r23 +; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_decrypt_asm +crypto_aead_decrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b011 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + + ldi ZH, hi8(SRAM_ADDITIONAL) + ldi ZL, lo8(SRAM_ADDITIONAL) + rcall Finalization + + sbiw ZL, CRYPTO_ABYTES + ldi cnt0, CRYPTO_ABYTES +compare_tag: + ld tmp0, Z+ + ld tmp1, Y+ + cp tmp0, tmp1 + brne return_tag_not_match + dec cnt0 + brne compare_tag + rjmp return_tag_match + +return_tag_not_match: + ldi r25, 0xFF + ldi r24, 0xFF + rjmp crypto_aead_decrypt_end + +return_tag_match: + clr r25 + clr r24 +crypto_aead_decrypt_end: + POP_ALL +ret + +; #ifdef CRYPTO_AEAD +#endif + + +#ifdef CRYPTO_HASH + +; void crypto_hash_asm( +; unsigned char *out, +; const unsigned char *in, +; unsigned long long inlen +; ) +; +; unsigned char *out, is passed in r24:r25 +; const unsigned char *in, is passed in r22:r23 +; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used +.global crypto_hash_asm +crypto_hash_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + mov mclen, r20 + + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi cnt0, STATE_INBYTES - 1 +#else + ldi cnt0, STATE_INBYTES +#endif + clr tmp0 +zero_state: + st X+, tmp0 + dec cnt0 + brne zero_state + +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + ldi rn, NR_h + ldi AEDH, 0b100 + +HASH_ABSORBING: + mov radlen, mclen + tst radlen + breq EMPTY_M + ldi rate, HASH_RATE_INBYTES + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + rcall AUTH + rjmp HASH_SQUEEZING + +EMPTY_M: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 + rcall Permutation + +HASH_SQUEEZING: + ldi rate, HASH_SQUEEZE_RATE_INBYTES + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + ldi tcnt, CRYPTO_BYTES +SQUEEZING_loop: + rcall EXTRACT_from_State + subi tcnt, HASH_SQUEEZE_RATE_INBYTES + breq HASH_SQUEEZING_end + rcall Permutation + rjmp SQUEEZING_loop +HASH_SQUEEZING_end: + POP_ALL +ret + +#endif + + +; Byte Order In AVR 8: +; KNOT-AEAD(128, 256, 64): +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[7] <<< 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Perm_row_1[0] 1 +; N[ 9] AEAD_State[ 9] | Perm_row_1[1] 1 +; N[10] AEAD_State[10] | Perm_row_1[2] 1 +; N[11] AEAD_State[11] | Perm_row_1[3] 1 +; N[12] AEAD_State[12] | Perm_row_1[4] 1 +; N[13] AEAD_State[13] | Perm_row_1[5] 1 +; N[14] AEAD_State[14] | Perm_row_1[6] 1 +; N[15] AEAD_State[15] | Perm_row_1[7] <<< 1 +; K[ 0] AEAD_State[16] | Perm_row_2[0] 8 +; K[ 1] AEAD_State[17] | Perm_row_2[1] 8 +; K[ 2] AEAD_State[18] | Perm_row_2[2] 8 +; K[ 3] AEAD_State[19] | Perm_row_2[3] 8 +; K[ 4] AEAD_State[20] | Perm_row_2[4] 8 +; K[ 5] AEAD_State[21] | Perm_row_2[5] 8 +; K[ 6] AEAD_State[22] | Perm_row_2[6] 8 +; K[ 7] AEAD_State[23] | Perm_row_2[7] <<< 8 +; K[ 8] AEAD_State[24] | Perm_row_3[0] 25 +; K[ 9] AEAD_State[25] | Perm_row_3[1] 25 +; K[10] AEAD_State[26] | Perm_row_3[2] 25 +; K[11] AEAD_State[27] | Perm_row_3[3] 25 +; K[12] AEAD_State[28] | Perm_row_3[4] 25 +; K[13] AEAD_State[29] | Perm_row_3[5] 25 +; K[14] AEAD_State[30] | Perm_row_3[6] 25 +; K[15] AEAD_State[31] | ^0x80 Perm_row_3[7] <<< 25 +; +; +; KNOT-AEAD(128, 384, 192): +; Initalization +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[ 0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[ 1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[ 2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[ 3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[ 4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[ 5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[ 6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[ 7] 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Message[ 8] 0x00 Perm_row_0[ 8] 0 Tag[ 8] +; N[ 9] AEAD_State[ 9] | Message[ 9] 0x00 Perm_row_0[ 9] 0 Tag[ 9] +; N[10] AEAD_State[10] | Message[10] 0x00 Perm_row_0[10] 0 Tag[10] +; N[11] AEAD_State[11] | Message[11] 0x00 Perm_row_0[11] <<< 0 Tag[11] +; N[12] AEAD_State[12] | Message[12] 0x00 Perm_row_1[ 0] 1 Tag[12] +; N[13] AEAD_State[13] | Message[13] 0x00 Perm_row_1[ 1] 1 Tag[13] +; N[14] AEAD_State[14] | Message[14] 0x00 Perm_row_1[ 2] 1 Tag[14] +; N[15] AEAD_State[15] | Message[15] 0x00 Perm_row_1[ 3] 1 Tag[15] +; K[ 0] AEAD_State[16] | Message[16] 0x00 Perm_row_1[ 4] 1 +; K[ 1] AEAD_State[17] | Message[17] 0x00 Perm_row_1[ 5] 1 +; K[ 2] AEAD_State[18] | Message[18] 0x00 Perm_row_1[ 6] 1 +; K[ 3] AEAD_State[19] | Message[19] 0x00 Perm_row_1[ 7] 1 +; K[ 4] AEAD_State[20] | Message[20] 0x00 Perm_row_1[ 8] 1 +; K[ 5] AEAD_State[21] | Message[21] 0x00 Perm_row_1[ 9] 1 +; K[ 6] AEAD_State[22] | Message[22] 0x00 Perm_row_1[10] 1 +; K[ 7] AEAD_State[23] | Message[23] 0x00 Perm_row_1[11] <<< 1 +; K[ 8] AEAD_State[24] | Perm_row_2[ 0] 8 +; K[ 9] AEAD_State[25] | Perm_row_2[ 1] 8 +; K[10] AEAD_State[26] | Perm_row_2[ 2] 8 +; K[11] AEAD_State[27] | Perm_row_2[ 3] 8 +; K[12] AEAD_State[28] | Perm_row_2[ 4] 8 +; K[13] AEAD_State[29] | Perm_row_2[ 5] 8 +; K[14] AEAD_State[30] | Perm_row_2[ 6] 8 +; K[15] AEAD_State[31] | Perm_row_2[ 7] 8 +; 0x00 AEAD_State[32] | Perm_row_2[ 8] 8 +; 0x00 AEAD_State[33] | Perm_row_2[ 9] 8 +; 0x00 AEAD_State[34] | Perm_row_2[10] 8 +; 0x00 AEAD_State[35] | Perm_row_2[11] <<< 8 +; 0x00 AEAD_State[36] | Perm_row_3[ 0] 55 +; 0x00 AEAD_State[37] | Perm_row_3[ 1] 55 +; 0x00 AEAD_State[38] | Perm_row_3[ 2] 55 +; 0x00 AEAD_State[39] | Perm_row_3[ 3] 55 +; 0x00 AEAD_State[40] | Perm_row_3[ 4] 55 +; 0x00 AEAD_State[41] | Perm_row_3[ 5] 55 +; 0x00 AEAD_State[42] | Perm_row_3[ 6] 55 +; 0x00 AEAD_State[43] | Perm_row_3[ 7] 55 +; 0x00 AEAD_State[44] | Perm_row_3[ 8] 55 +; 0x00 AEAD_State[45] | Perm_row_3[ 9] 55 +; 0x00 AEAD_State[46] | Perm_row_3[10] 55 +; 0x00 ^0x80 AEAD_State[47] | ^0x80 Perm_row_3[11] <<< 55 diff --git a/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/knot256.h b/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/knot256.h new file mode 100644 index 0000000..d16bf8c --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/knot256.h @@ -0,0 +1,197 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +#include "assist.h" + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi rc, 0x01 + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + +round_loop_start: + rjmp AddRC_SubColumns_Start + +load_columns_table: + rjmp load_column0 + rjmp load_column1 + rjmp load_column2 + rjmp load_column3 + rjmp load_column4 + rjmp load_column5 + rjmp load_column6 + rjmp load_column7 + rjmp amend_shiftRow + +load_column0: + mov x3j, x30 + rjmp Sbox_one_column +load_column1: + mov x30, x3j + mov x3j, x31 + rjmp Sbox_one_column +load_column2: + mov x31, x3j + mov x3j, x32 + rjmp Sbox_one_column +load_column3: + mov x32, x3j + mov x3j, x33 + rjmp Sbox_one_column +load_column4: + mov x33, x3j + mov x3j, x34 + rjmp Sbox_one_column +load_column5: + mov x34, x3j + mov x3j, x35 + rjmp Sbox_one_column +load_column6: + mov x35, x3j + mov x3j, x36 + rjmp Sbox_one_column +load_column7: + mov x36, x3j + mov x3j, x37 + rjmp Sbox_one_column + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +LFSR_table: + rjmp LFSR6 + rjmp LFSR7 +LFSR6: + LFSR6_MACRO + rjmp LFSR_DONE +LFSR7: + LFSR7_MACRO + rjmp LFSR_DONE +#endif + +;;;;;;;;;;;;;;;;;;;;;;;; Real Start +AddRC_SubColumns_Start: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + clr ccnt + ld x0j, Y + eor x0j, rc + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + ldi ZL, pm_lo8(LFSR_table) + ldi ZH, pm_hi8(LFSR_table) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[1] = 1 for HASH + adiw ZL, 1 + ijmp +LFSR_DONE: +#elif defined(CRYPTO_AEAD) + LFSR6_MACRO ; only AEAD +#else + LFSR7_MACRO ; only HASH +#endif + + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + ldi ZL, pm_lo8(load_columns_table) + ldi ZH, pm_hi8(load_columns_table) + ijmp +Sbox_one_column: + Sbox x0j, x1j, x2j, x3j + + ; 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- x- -- 1 + ; -- -- -- -- x' -- -- -- 3 + ; 4 3 2 1 0 7 6 5 + ; Store a byte to Row 0 + st Y, x0j + ; Store a byte combined with ShiftRow1 + lsl t1j + mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left) + rol x1j + std Y + ROW_INBYTES, x1j + ; Store a byte combined with ShiftRow2 + inc ccnt + cpi ccnt, ROW_INBYTES + breq ROW2_WRAP + ldd t2j, Y + 2 * ROW_INBYTES + 1 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t2j + jmp NO_ROW2_WRAP +ROW2_WRAP: + std Y + ROW_INBYTES + 1, x2j + ; remain ShiftRow3 to be done at 'amend_shiftRow' +NO_ROW2_WRAP: + adiw YL, 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + + adiw ZL, 1 + ijmp + +amend_shiftRow: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + + ld x1j, Y + bst t1j, 7 + bld x1j, 0 + st Y, x1j + + ; <<< 1 + mov x37, x3j + rol x3j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + ; <<< 24 + ; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5 + mov x3j, x30 + mov x30, x35 + mov x35, x32 + mov x32, x37 + mov x37, x34 + mov x34, x31 + mov x31, x36 + mov x36, x33 + mov x33, x3j + + dec rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + + POP_CONFLICT +ret \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/knot384.h b/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/knot384.h new file mode 100644 index 0000000..65c474a --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/knot384.h @@ -0,0 +1,219 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +#include "assist.h" + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi rc, 0x01 + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + +round_loop_start: + rjmp AddRC_SubColumns_Start + +load_columns_table: + rjmp load_column0 + rjmp load_column1 + rjmp load_column2 + rjmp load_column3 + rjmp load_column4 + rjmp load_column5 + rjmp load_column6 + rjmp load_column7 + rjmp load_column8 + rjmp load_column9 + rjmp load_columna + rjmp load_columnb + rjmp amend_shiftRow + +load_column0: + mov x3j, x30 + rjmp Sbox_one_column +load_column1: + mov x30, x3j + mov x3j, x31 + rjmp Sbox_one_column +load_column2: + mov x31, x3j + mov x3j, x32 + rjmp Sbox_one_column +load_column3: + mov x32, x3j + mov x3j, x33 + rjmp Sbox_one_column +load_column4: + mov x33, x3j + mov x3j, x34 + rjmp Sbox_one_column +load_column5: + mov x34, x3j + mov x3j, x35 + rjmp Sbox_one_column +load_column6: + mov x35, x3j + mov x3j, x36 + rjmp Sbox_one_column +load_column7: + mov x36, x3j + mov x3j, x37 + rjmp Sbox_one_column +load_column8: + mov x37, x3j + mov x3j, x38 + rjmp Sbox_one_column +load_column9: + mov x38, x3j + mov x3j, x39 + rjmp Sbox_one_column +load_columna: + mov x39, x3j + mov x3j, x3a + rjmp Sbox_one_column +load_columnb: + mov x3a, x3j + mov x3j, x3b + rjmp Sbox_one_column + +;;;;;;;;;;;;;;;;;;;;;;;; Real Start +AddRC_SubColumns_Start: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + ldi ZL, pm_lo8(load_columns_table) + ldi ZH, pm_hi8(load_columns_table) + clr ccnt + ld x0j, Y + eor x0j, rc + LFSR7_MACRO + + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + ijmp +Sbox_one_column: + Sbox x0j, x1j, x2j, x3j + + ; b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- x- -- 1 + ; -- -- -- -- x' -- -- -- -- -- -- -- 7 + ; 4 3 2 1 0 b a 9 8 7 6 5 + ; Store a byte to Row 0 + st Y, x0j + ; Store a byte combined with ShiftRow 1 + lsl t1j + mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left) + rol x1j + std Y + ROW_INBYTES, x1j + ; Store a byte combined with ShiftRow 2 + inc ccnt + cpi ccnt, ROW_INBYTES + breq ROW2_WRAP + ldd t2j, Y + 2 * ROW_INBYTES + 1 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t2j + jmp NO_ROW2_WRAP +ROW2_WRAP: + std Y + ROW_INBYTES + 1, x2j + ; remain ShiftRow3 to be done at 'amend_shiftRow' +NO_ROW2_WRAP: + adiw YL, 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + + adiw ZL, 1 + ijmp + +amend_shiftRow: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + + ld x1j, Y + bst t1j, 7 + bld x1j, 0 + st Y, x1j + + ; >>> 1 + mov x3b, x3j + ror x3j + ror x3a + ror x39 + ror x38 + ror x37 + ror x36 + ror x35 + ror x34 + ror x33 + ror x32 + ror x31 + ror x30 + ror x3b + ; <<< 56 + ; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5 + ;mov x3j, x30 + ;mov x30, x35 + ;mov x35, x32 + ;mov x32, x37 + ;mov x37, x34 + ;mov x34, x31 + ;mov x31, x36 + ;mov x36, x33 + ;mov x33, x3j + mov x3j, x30 + mov x30, x35 + mov x35, x3a + mov x3a, x33 + mov x33, x38 + mov x38, x31 + mov x31, x36 + mov x36, x3b + mov x3b, x34 + mov x34, x39 + mov x39, x32 + mov x32, x37 + mov x37, x3j + + dec rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + + POP_CONFLICT +ret \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/knot512.h b/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/knot512.h new file mode 100644 index 0000000..d24b353 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/knot512.h @@ -0,0 +1,275 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +#include "assist.h" + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi rc, 0x01 + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + ld x3c, Y+ + ld x3d, Y+ + ld x3e, Y+ + ld x3f, Y+ + +round_loop_start: + rjmp AddRC_SubColumns_Start + +load_columns_table: + rjmp load_column0 + rjmp load_column1 + rjmp load_column2 + rjmp load_column3 + rjmp load_column4 + rjmp load_column5 + rjmp load_column6 + rjmp load_column7 + rjmp load_column8 + rjmp load_column9 + rjmp load_columna + rjmp load_columnb + rjmp load_columnc + rjmp load_columnd + rjmp load_columne + rjmp load_columnf + rjmp amend_shiftRow + +load_column0: + mov x3j, x30 + rjmp Sbox_one_column +load_column1: + mov x30, x3j + mov x3j, x31 + rjmp Sbox_one_column +load_column2: + mov x31, x3j + mov x3j, x32 + rjmp Sbox_one_column +load_column3: + mov x32, x3j + mov x3j, x33 + rjmp Sbox_one_column +load_column4: + mov x33, x3j + mov x3j, x34 + rjmp Sbox_one_column +load_column5: + mov x34, x3j + mov x3j, x35 + rjmp Sbox_one_column +load_column6: + mov x35, x3j + mov x3j, x36 + rjmp Sbox_one_column +load_column7: + mov x36, x3j + mov x3j, x37 + rjmp Sbox_one_column +load_column8: + mov x37, x3j + mov x3j, x38 + rjmp Sbox_one_column +load_column9: + mov x38, x3j + mov x3j, x39 + rjmp Sbox_one_column +load_columna: + mov x39, x3j + mov x3j, x3a + rjmp Sbox_one_column +load_columnb: + mov x3a, x3j + mov x3j, x3b + rjmp Sbox_one_column +load_columnc: + mov x3b, x3j + mov x3j, x3c + rjmp Sbox_one_column +load_columnd: + mov x3c, x3j + mov x3j, x3d + rjmp Sbox_one_column +load_columne: + mov x3d, x3j + mov x3j, x3e + rjmp Sbox_one_column +load_columnf: + mov x3e, x3j + mov x3j, x3f + rjmp Sbox_one_column + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +LFSR_table: + rjmp LFSR7 + rjmp LFSR8 +LFSR7: + LFSR7_MACRO + rjmp LFSR_DONE +LFSR8: + LFSR8_MACRO + rjmp LFSR_DONE +#endif + +;;;;;;;;;;;;;;;;;;;;;;;; Real Start +AddRC_SubColumns_Start: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + clr ccnt + ld x0j, Y + eor x0j, rc + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + ldi ZL, pm_lo8(LFSR_table) + ldi ZH, pm_hi8(LFSR_table) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[1] = 1 for HASH + adiw ZL, 1 + ijmp +LFSR_DONE: +#elif defined(CRYPTO_AEAD) + LFSR7_MACRO ; only AEAD +#else + LFSR8_MACRO ; only HASH +#endif + + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + ldd t2j, Y + 2 * ROW_INBYTES + 1 + ldi ZL, pm_lo8(load_columns_table) + ldi ZH, pm_hi8(load_columns_table) + ijmp +Sbox_one_column: + Sbox x0j, x1j, x2j, x3j + + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2 + ; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3 + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + ; Store a byte to Row 0 + st Y, x0j + ; Store a byte combined with ShiftRow1 + lsl t1j + mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left) + rol x1j + std Y + ROW_INBYTES, x1j + ; Store a byte combined with ShiftRow2 + inc ccnt + cpi ccnt, ROW_INBYTES - 1 + brsh ROW2_WRAP + ldd tmp0, Y + 2 * ROW_INBYTES + 2 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes + std Y + 2 * ROW_INBYTES + 2, x2j + mov x2j, t2j + mov t2j, tmp0 + jmp NO_ROW2_WRAP +ROW2_WRAP: + std Y + ROW_INBYTES + 2, x2j + mov x2j, t2j + + ; remain ShiftRow3 to be done at 'amend_shiftRow' +NO_ROW2_WRAP: + adiw YL, 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + + adiw ZL, 1 + ijmp + +amend_shiftRow: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + + ld x1j, Y + bst t1j, 7 + bld x1j, 0 + st Y, x1j + + ; <<< 1 + mov x3f, x3j + rol x3j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + rol x38 + rol x39 + rol x3a + rol x3b + rol x3c + rol x3d + rol x3e + rol x3f + ; <<< 24 + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 => + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + mov x3j, x30 + mov x30, x3d + mov x3d, x3a + mov x3a, x37 + mov x37, x34 + mov x34, x31 + mov x31, x3e + mov x3e, x3b + mov x3b, x38 + mov x38, x35 + mov x35, x32 + mov x32, x3f + mov x3f, x3c + mov x3c, x39 + mov x39, x36 + mov x36, x33 + mov x33, x3j + + dec rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + st Y+, x3c + st Y+, x3d + st Y+, x3e + st Y+, x3f + + POP_CONFLICT +ret \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/permutation.h b/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/permutation.h new file mode 100644 index 0000000..a57c5d3 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/avr8_lowrom/permutation.h @@ -0,0 +1,109 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; R E G I S T E R D E F I N I T I O N S +; ============================================ +; + +#define mclen r16 +#define radlen r17 +#define tcnt r17 +#define tmp0 r20 +#define tmp1 r21 +#define cnt0 r22 +#define rn r23 +#define rate r24 + +; +; ; AEDH = 0b000: for authenticate AD +; ; AEDH = 0b001: for encryption +; ; AEDH = 0b011: for decryption +; ; AEDH = 0b100: for hash +; #define AEDH r25 ; Register used globally within this program +; +; #define x30 r0 ; Register used without overlapping +; #define x31 r1 ; Register used without overlapping +; #define x32 r2 ; Register used without overlapping +; #define x33 r3 ; Register used without overlapping +; #define x34 r4 ; Register used without overlapping +; #define x35 r5 ; Register used without overlapping +; #define x36 r6 ; Register used without overlapping +; #define x37 r7 ; Register used without overlapping +; #define x38 r8 ; Register used without overlapping +; #define x39 r9 ; Register used without overlapping +; #define x3a r10 ; Register used without overlapping +; #define x3b r11 ; Register used without overlapping +; #define x3c r12 ; Register used without overlapping +; #define x3d r13 ; Register used without overlapping +; #define x3e r14 ; Register used without overlapping +; #define x3f r15 ; Register used without overlapping +; +; #define x0j r16 ; Register used overlapped, should be backed up before using +; #define x1j r17 ; Register used overlapped, should be backed up before using +; #define x2j r18 ; Register used overlapped, should be backed up before using +; #define x3j r19 ; Register used overlapped, should be backed up before using +; +; ; t2j used in knot512 to keep one byte in Row2 (because of rotating 16-bit), +; ; will not be interupt with LFSR which uses the overlapped register tmp1 +; #define t2j r21 ; Temporary register, used freely +; #define t1j r22 ; Temporary register, used freely +; #define t3j r23 ; Temporary register, used freely +; +; #define rc r24 ; Register used overlapped, should be backed up before using +; #define rcnt r26 ; Register used overlapped, should be backed up before using +; #define ccnt r27 ; Register used overlapped, should be backed up before using + +#define AEDH r25 +#define x30 r0 +#define x31 r1 +#define x32 r2 +#define x33 r3 +#define x34 r4 +#define x35 r5 +#define x36 r6 +#define x37 r7 +#define x38 r8 +#define x39 r9 +#define x3a r10 +#define x3b r11 +#define x3c r12 +#define x3d r13 +#define x3e r14 +#define x3f r15 + +#define x0j r16 +#define x1j r17 +#define x2j r18 +#define x3j r19 + +; t2j used in knot512 to keep one byte in Row2 (because of rotating 16-bit), +; will not be interupt with LFSR which uses the overlapped register tmp1 +#define t2j r21 +#define t1j r22 +#define t3j r23 + +#define rc r24 +#define rcnt r26 +#define ccnt r27 + +#if (STATE_INBITS==256) +#include "knot256.h" +#elif (STATE_INBITS==384) +#include "knot384.h" +#elif (STATE_INBITS==512) +#include "knot512.h" +#else +#error "Not specified key size and state size" +#endif + + diff --git a/knot/Implementations/crypto_aead/knot192/avr8_lowrom/api.h b/knot/Implementations/crypto_aead/knot192/avr8_lowrom/api.h new file mode 100644 index 0000000..0146d82 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/avr8_lowrom/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 24 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 24 +#define CRYPTO_ABYTES 24 +#define CRYPTO_NOOVERLAP 1 diff --git a/knot/Implementations/crypto_aead/knot192/avr8_lowrom/assist.h b/knot/Implementations/crypto_aead/knot192/avr8_lowrom/assist.h new file mode 100644 index 0000000..cb903a5 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/avr8_lowrom/assist.h @@ -0,0 +1,140 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +.macro LFSR6_MACRO + bst rc, 5 + bld tmp0, 0 + bst rc, 4 + bld tmp1, 0 + eor tmp0, tmp1 + ror tmp0 + rol rc + andi rc, 0x3F +.endm + +.macro LFSR7_MACRO + bst rc, 6 + bld tmp0, 0 + bst rc, 5 + bld tmp1, 0 + eor tmp0, tmp1 + ror tmp0 + rol rc + andi rc, 0x7F +.endm + +.macro LFSR8_MACRO + bst rc, 7 + bld tmp0, 0 + bst rc, 5 + bld tmp1, 0 + eor tmp0, tmp1 + bst rc, 4 + bld tmp1, 0 + eor tmp0, tmp1 + bst rc, 3 + bld tmp1, 0 + eor tmp0, tmp1 + ror tmp0 + rol rc +.endm + +.macro Sbox i0, i1, i2, i3 + mov tmp0, \i1 + com \i0 + and \i1, \i0 + eor \i1, \i2 + or \i2, tmp0 + eor \i0, \i3 + eor \i2, \i0 + eor tmp0, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, tmp0 + and tmp0, \i2 + eor \i1, tmp0 +.endm + +.macro PUSH_CONFLICT + push r16 + push r17 + push r18 + push r19 + + push r23 + push r24 + + push r26 + push r27 + push r28 + push r29 + push r30 + push r31 +.endm + +.macro POP_CONFLICT + pop r31 + pop r30 + pop r29 + pop r28 + pop r27 + pop r26 + + pop r24 + pop r23 + + pop r19 + pop r18 + pop r17 + pop r16 +.endm + +.macro PUSH_ALL + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 +.endm + +.macro POP_ALL + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + clr r1 +.endm \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot192/avr8_lowrom/config.h b/knot/Implementations/crypto_aead/knot192/avr8_lowrom/config.h new file mode 100644 index 0000000..173a40a --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/avr8_lowrom/config.h @@ -0,0 +1,131 @@ +#ifndef __CONFIG_H__ +#define __CONFIG_H__ + +#define CRYPTO_AEAD +//#define CRYPTO_HASH + +#define MAX_MESSAGE_LENGTH 128 + +#define STATE_INBITS 384 +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBITS 192 +/* For CRYPTO_HASH */ +#define CRYPTO_BITS 384 + +#define STATE_INBYTES ((STATE_INBITS + 7) / 8) +#define ROW_INBITS ((STATE_INBITS + 3) / 4) +#define ROW_INBYTES ((ROW_INBITS + 7) / 8) + +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8) +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES +#define CRYPTO_ABYTES CRYPTO_KEYBYTES +#define CRYPTO_NOOVERLAP 1 + +#define MAX_ASSOCIATED_DATA_LENGTH 32 +#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES) + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +/* For CRYPTO_HASH */ +#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8) + + + +#define DOMAIN_BITS 0x80 +#define PAD_BITS 0x01 +#define S384_R192_BITS 0x80 + +#if (STATE_INBITS==256) +#define C1 1 +#define C2 8 +#define C3 25 +#elif (STATE_INBITS==384) +#define C1 1 +#define C2 8 +#define C3 55 +#elif (STATE_INBITS==512) +#define C1 1 +#define C2 16 +#define C3 25 +#else +#error "Not specified state size" +#endif + +#ifdef CRYPTO_AEAD +/* For CRYPTO_AEAD */ +#define KEY_INBITS (CRYPTO_KEYBYTES * 8) +#define KEY_INBYTES (CRYPTO_KEYBYTES) + +#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8) +#define NONCE_INBYTES (CRYPTO_NPUBBYTES) + +#define TAG_INBITS (CRYPTO_ABYTES * 8) +#define TAG_INBYTES (CRYPTO_ABYTES) + +#if (KEY_INBITS==128) && (STATE_INBITS==256) +#define RATE_INBITS 64 +#define NR_0 52 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==128) && (STATE_INBITS==384) +#define RATE_INBITS 192 +#define NR_0 76 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==192) && (STATE_INBITS==384) +#define RATE_INBITS 96 +#define NR_0 76 +#define NR_i 40 +#define NR_f 44 +#elif (KEY_INBITS==256) && (STATE_INBITS==512) +#define RATE_INBITS 128 +#define NR_0 100 +#define NR_i 52 +#define NR_f 56 +#else +#error "Not specified key size and state size" +#endif + +#define RATE_INBYTES ((RATE_INBITS + 7) / 8) +#define SQUEEZE_RATE_INBYTES TAG_INBYTES + +#endif + +#ifdef CRYPTO_HASH +/* For CRYPTO_HASH */ +#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8) + +#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256) +#define HASH_RATE_INBITS 32 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 68 +#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 128 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 80 +#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 48 +#define HASH_SQUEEZE_RATE_INBITS 192 +#define NR_h 104 +#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512) +#define HASH_RATE_INBITS 64 +#define HASH_SQUEEZE_RATE_INBITS 256 +#define NR_h 140 +#else +#error "Not specified hash digest size and state size" +#endif + +#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8) +#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8) + +#endif + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot192/avr8_lowrom/crypto_aead.h b/knot/Implementations/crypto_aead/knot192/avr8_lowrom/crypto_aead.h new file mode 100644 index 0000000..cd820d3 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/avr8_lowrom/crypto_aead.h @@ -0,0 +1,26 @@ +#ifdef __cplusplus +extern "C" { +#endif + +int crypto_aead_encrypt( + unsigned char *c,unsigned long long *clen, + const unsigned char *m,unsigned long long mlen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ); + + +int crypto_aead_decrypt( + unsigned char *m,unsigned long long *outputmlen, + unsigned char *nsec, + const unsigned char *c,unsigned long long clen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ); + +#ifdef __cplusplus +} +#endif diff --git a/knot/Implementations/crypto_aead/knot192/avr8_lowrom/encrypt.c b/knot/Implementations/crypto_aead/knot192/avr8_lowrom/encrypt.c new file mode 100644 index 0000000..baf0a3b --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/avr8_lowrom/encrypt.c @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#include "config.h" + +extern void crypto_aead_encrypt_asm( + unsigned char *c, + const unsigned char *m, + unsigned char mlen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern int crypto_aead_decrypt_asm( + unsigned char *m, + const unsigned char *c, + unsigned char clen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern void crypto_hash_asm( + unsigned char *out, + const unsigned char *in, + unsigned char inlen + ); + + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the cipher implementation goes here, + ... generating a ciphertext c[0],c[1],...,c[*clen-1] + ... from a plaintext m[0],m[1],...,m[mlen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce npub[0],npub[1],.. + ... and secret key k[0],k[1],... + ... the implementation shall not use nsec + ... + ... return 0; + */ + + (void)nsec; + + crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k); + + *clen = mlen + TAG_INBYTES; + return 0; +} + + + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the AEAD implementation goes here, + ... generating a plaintext m[0],m[1],...,m[*mlen-1] + ... and secret message number nsec[0],nsec[1],... + ... from a ciphertext c[0],c[1],...,c[clen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce number npub[0],npub[1],... + ... and secret key k[0],k[1],... + ... + ... return 0; + */ + unsigned long long mlen_; + unsigned char tag_is_match; + + (void)nsec; + if (clen < CRYPTO_ABYTES) { + return -1; + } + mlen_ = clen - CRYPTO_ABYTES; + + tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k); + + if (tag_is_match != 0) + { + memset(m, 0, (size_t)mlen_); + return -1; + } + + *mlen = mlen_; + return 0; +} \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot192/avr8_lowrom/encrypt_core.S b/knot/Implementations/crypto_aead/knot192/avr8_lowrom/encrypt_core.S new file mode 100644 index 0000000..cb7aed5 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/avr8_lowrom/encrypt_core.S @@ -0,0 +1,537 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; S R A M D E F I N I T I O N S +; ============================================ +; +#include +#include "config.h" + +.section .noinit + SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (STATE_INBYTES > 32) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (STATE_INBYTES > 48) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_LEN: .BYTE 0, 0 +#ifdef CRYPTO_AEAD +; For CRYPTO_AEAD + SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0 + SRAM_ADLEN: .BYTE 0, 0 + SRAM_NONCE_ADDR: .BYTE 0, 0 + SRAM_KEY_ADDR: .BYTE 0, 0 + + SRAM_ADDITIONAL: + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (CRYPTO_ABYTES > 16) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (CRYPTO_ABYTES > 24) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + +#endif + +.section .text + +#include "permutation.h" + +; require YH:YL be the address of the current associated data/cipher/message block +; for enc and dec, store ciphertext or plaintext +; require ZH:ZL be the address of the current cipher/message block +XOR_to_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov cnt0, rate +XOR_to_State_loop: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_to_State_loop +; YH:YL are now the address of the next associated data block +ret + +; require YH:YL pointed to the input data +; require ZH:ZL pointed to the output data +; require cnt0 containes the nubmer of bytes in source data +; require number of bytes in source data less than rate, i.e., 0 <= cnt0 < rate +; +; the 0th bit in AEDH is used to distinguish (auth AD) or (enc/dec M/C): +; AEDH[0] = 0 for (auth AD), AEDH[0] = 1 for (enc/dec M/C) +; the 1th bit in AEDH is used to distinguish (auth AD/enc M) or (dec C): +; AEDH[1] = 0 for (auth AD/enc M), AEDH[1] = 1 for (dec C) +; AEDH = 0b000 for (auth AD) +; AEDH = 0b001 for (enc M) +; AEDH = 0b011 for (dec C) +Pad_XOR_to_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + tst cnt0 + breq XOR_padded_data +XOR_source_data_loop: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_source_data_loop +XOR_padded_data: + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 +ret + +AddDomain: + ldi XH, hi8(SRAM_STATE + STATE_INBYTES - 1) + ldi XL, lo8(SRAM_STATE + STATE_INBYTES - 1) + ldi tmp0, DOMAIN_BITS + ld tmp1, X + eor tmp0, tmp1 + st X, tmp0 +ret + +; require ZH:ZL be the address of the destination +EXTRACT_from_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov tmp1, rate +EXTRACT_from_State_loop: + ld tmp0, X+ + st Z+, tmp0 + dec tmp1 + brne EXTRACT_from_State_loop +ret + +AUTH: + tst radlen + breq AUTH_end + + cp radlen, rate + brlo auth_ad_padded_block + +auth_ad_loop: + rcall XOR_to_State + rcall Permutation + sub radlen, rate + cp radlen, rate + brlo auth_ad_padded_block + rjmp auth_ad_loop + +auth_ad_padded_block: + mov cnt0, radlen + rcall Pad_XOR_to_State + rcall Permutation + +AUTH_end: +ret + +#ifdef CRYPTO_AEAD +Initialization: + ldi rn, NR_0 + ldi XL, lo8(SRAM_STATE) + ldi XH, hi8(SRAM_STATE) + + lds YH, SRAM_NONCE_ADDR + lds YL, SRAM_NONCE_ADDR + 1 + ldi cnt0, CRYPTO_NPUBBYTES +load_nonce_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_nonce_loop + + lds YH, SRAM_KEY_ADDR + lds YL, SRAM_KEY_ADDR + 1 + ldi cnt0, CRYPTO_KEYBYTES +load_key_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_key_loop + +#if (STATE_INBITS==384) && (RATE_INBITS==192) + ldi cnt0, (STATE_INBYTES - CRYPTO_NPUBBYTES - CRYPTO_KEYBYTES - 1) + clr tmp0 +empty_state_loop: + st X+, tmp0 + dec cnt0 + brne empty_state_loop + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + rcall Permutation +ret + +ENC: + tst mclen + breq ENC_end + + cp mclen, rate + brlo enc_padded_block + +enc_loop: + rcall XOR_to_State + ldi rn, NR_i + rcall Permutation + sub mclen, rate + cp mclen, rate + brlo enc_padded_block + rjmp enc_loop + +enc_padded_block: + mov cnt0, mclen + rcall Pad_XOR_to_State +ENC_end: +ret + +Finalization: + ldi rate, SQUEEZE_RATE_INBYTES + ldi rn, NR_f + rcall Permutation + rcall EXTRACT_from_State +ret + +; void crypto_aead_encrypt_asm( +; unsigned char *c, +; const unsigned char *m, +; unsigned long long mlen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *c, is passed in r24:r25 +; const unsigned char *m, is passed in r22:r23 +; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_encrypt_asm +crypto_aead_encrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b001 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + rcall Finalization + POP_ALL +ret + +; int crypto_aead_decrypt_asm( +; unsigned char *m, +; const unsigned char *c, +; unsigned long long clen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *m, is passed in r24:r25 +; const unsigned char *c, is passed in r22:r23 +; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_decrypt_asm +crypto_aead_decrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b011 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + + ldi ZH, hi8(SRAM_ADDITIONAL) + ldi ZL, lo8(SRAM_ADDITIONAL) + rcall Finalization + + sbiw ZL, CRYPTO_ABYTES + ldi cnt0, CRYPTO_ABYTES +compare_tag: + ld tmp0, Z+ + ld tmp1, Y+ + cp tmp0, tmp1 + brne return_tag_not_match + dec cnt0 + brne compare_tag + rjmp return_tag_match + +return_tag_not_match: + ldi r25, 0xFF + ldi r24, 0xFF + rjmp crypto_aead_decrypt_end + +return_tag_match: + clr r25 + clr r24 +crypto_aead_decrypt_end: + POP_ALL +ret + +; #ifdef CRYPTO_AEAD +#endif + + +#ifdef CRYPTO_HASH + +; void crypto_hash_asm( +; unsigned char *out, +; const unsigned char *in, +; unsigned long long inlen +; ) +; +; unsigned char *out, is passed in r24:r25 +; const unsigned char *in, is passed in r22:r23 +; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used +.global crypto_hash_asm +crypto_hash_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + mov mclen, r20 + + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi cnt0, STATE_INBYTES - 1 +#else + ldi cnt0, STATE_INBYTES +#endif + clr tmp0 +zero_state: + st X+, tmp0 + dec cnt0 + brne zero_state + +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + ldi rn, NR_h + ldi AEDH, 0b100 + +HASH_ABSORBING: + mov radlen, mclen + tst radlen + breq EMPTY_M + ldi rate, HASH_RATE_INBYTES + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + rcall AUTH + rjmp HASH_SQUEEZING + +EMPTY_M: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 + rcall Permutation + +HASH_SQUEEZING: + ldi rate, HASH_SQUEEZE_RATE_INBYTES + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + ldi tcnt, CRYPTO_BYTES +SQUEEZING_loop: + rcall EXTRACT_from_State + subi tcnt, HASH_SQUEEZE_RATE_INBYTES + breq HASH_SQUEEZING_end + rcall Permutation + rjmp SQUEEZING_loop +HASH_SQUEEZING_end: + POP_ALL +ret + +#endif + + +; Byte Order In AVR 8: +; KNOT-AEAD(128, 256, 64): +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[7] <<< 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Perm_row_1[0] 1 +; N[ 9] AEAD_State[ 9] | Perm_row_1[1] 1 +; N[10] AEAD_State[10] | Perm_row_1[2] 1 +; N[11] AEAD_State[11] | Perm_row_1[3] 1 +; N[12] AEAD_State[12] | Perm_row_1[4] 1 +; N[13] AEAD_State[13] | Perm_row_1[5] 1 +; N[14] AEAD_State[14] | Perm_row_1[6] 1 +; N[15] AEAD_State[15] | Perm_row_1[7] <<< 1 +; K[ 0] AEAD_State[16] | Perm_row_2[0] 8 +; K[ 1] AEAD_State[17] | Perm_row_2[1] 8 +; K[ 2] AEAD_State[18] | Perm_row_2[2] 8 +; K[ 3] AEAD_State[19] | Perm_row_2[3] 8 +; K[ 4] AEAD_State[20] | Perm_row_2[4] 8 +; K[ 5] AEAD_State[21] | Perm_row_2[5] 8 +; K[ 6] AEAD_State[22] | Perm_row_2[6] 8 +; K[ 7] AEAD_State[23] | Perm_row_2[7] <<< 8 +; K[ 8] AEAD_State[24] | Perm_row_3[0] 25 +; K[ 9] AEAD_State[25] | Perm_row_3[1] 25 +; K[10] AEAD_State[26] | Perm_row_3[2] 25 +; K[11] AEAD_State[27] | Perm_row_3[3] 25 +; K[12] AEAD_State[28] | Perm_row_3[4] 25 +; K[13] AEAD_State[29] | Perm_row_3[5] 25 +; K[14] AEAD_State[30] | Perm_row_3[6] 25 +; K[15] AEAD_State[31] | ^0x80 Perm_row_3[7] <<< 25 +; +; +; KNOT-AEAD(128, 384, 192): +; Initalization +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[ 0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[ 1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[ 2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[ 3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[ 4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[ 5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[ 6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[ 7] 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Message[ 8] 0x00 Perm_row_0[ 8] 0 Tag[ 8] +; N[ 9] AEAD_State[ 9] | Message[ 9] 0x00 Perm_row_0[ 9] 0 Tag[ 9] +; N[10] AEAD_State[10] | Message[10] 0x00 Perm_row_0[10] 0 Tag[10] +; N[11] AEAD_State[11] | Message[11] 0x00 Perm_row_0[11] <<< 0 Tag[11] +; N[12] AEAD_State[12] | Message[12] 0x00 Perm_row_1[ 0] 1 Tag[12] +; N[13] AEAD_State[13] | Message[13] 0x00 Perm_row_1[ 1] 1 Tag[13] +; N[14] AEAD_State[14] | Message[14] 0x00 Perm_row_1[ 2] 1 Tag[14] +; N[15] AEAD_State[15] | Message[15] 0x00 Perm_row_1[ 3] 1 Tag[15] +; K[ 0] AEAD_State[16] | Message[16] 0x00 Perm_row_1[ 4] 1 +; K[ 1] AEAD_State[17] | Message[17] 0x00 Perm_row_1[ 5] 1 +; K[ 2] AEAD_State[18] | Message[18] 0x00 Perm_row_1[ 6] 1 +; K[ 3] AEAD_State[19] | Message[19] 0x00 Perm_row_1[ 7] 1 +; K[ 4] AEAD_State[20] | Message[20] 0x00 Perm_row_1[ 8] 1 +; K[ 5] AEAD_State[21] | Message[21] 0x00 Perm_row_1[ 9] 1 +; K[ 6] AEAD_State[22] | Message[22] 0x00 Perm_row_1[10] 1 +; K[ 7] AEAD_State[23] | Message[23] 0x00 Perm_row_1[11] <<< 1 +; K[ 8] AEAD_State[24] | Perm_row_2[ 0] 8 +; K[ 9] AEAD_State[25] | Perm_row_2[ 1] 8 +; K[10] AEAD_State[26] | Perm_row_2[ 2] 8 +; K[11] AEAD_State[27] | Perm_row_2[ 3] 8 +; K[12] AEAD_State[28] | Perm_row_2[ 4] 8 +; K[13] AEAD_State[29] | Perm_row_2[ 5] 8 +; K[14] AEAD_State[30] | Perm_row_2[ 6] 8 +; K[15] AEAD_State[31] | Perm_row_2[ 7] 8 +; 0x00 AEAD_State[32] | Perm_row_2[ 8] 8 +; 0x00 AEAD_State[33] | Perm_row_2[ 9] 8 +; 0x00 AEAD_State[34] | Perm_row_2[10] 8 +; 0x00 AEAD_State[35] | Perm_row_2[11] <<< 8 +; 0x00 AEAD_State[36] | Perm_row_3[ 0] 55 +; 0x00 AEAD_State[37] | Perm_row_3[ 1] 55 +; 0x00 AEAD_State[38] | Perm_row_3[ 2] 55 +; 0x00 AEAD_State[39] | Perm_row_3[ 3] 55 +; 0x00 AEAD_State[40] | Perm_row_3[ 4] 55 +; 0x00 AEAD_State[41] | Perm_row_3[ 5] 55 +; 0x00 AEAD_State[42] | Perm_row_3[ 6] 55 +; 0x00 AEAD_State[43] | Perm_row_3[ 7] 55 +; 0x00 AEAD_State[44] | Perm_row_3[ 8] 55 +; 0x00 AEAD_State[45] | Perm_row_3[ 9] 55 +; 0x00 AEAD_State[46] | Perm_row_3[10] 55 +; 0x00 ^0x80 AEAD_State[47] | ^0x80 Perm_row_3[11] <<< 55 diff --git a/knot/Implementations/crypto_aead/knot192/avr8_lowrom/knot256.h b/knot/Implementations/crypto_aead/knot192/avr8_lowrom/knot256.h new file mode 100644 index 0000000..d16bf8c --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/avr8_lowrom/knot256.h @@ -0,0 +1,197 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +#include "assist.h" + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi rc, 0x01 + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + +round_loop_start: + rjmp AddRC_SubColumns_Start + +load_columns_table: + rjmp load_column0 + rjmp load_column1 + rjmp load_column2 + rjmp load_column3 + rjmp load_column4 + rjmp load_column5 + rjmp load_column6 + rjmp load_column7 + rjmp amend_shiftRow + +load_column0: + mov x3j, x30 + rjmp Sbox_one_column +load_column1: + mov x30, x3j + mov x3j, x31 + rjmp Sbox_one_column +load_column2: + mov x31, x3j + mov x3j, x32 + rjmp Sbox_one_column +load_column3: + mov x32, x3j + mov x3j, x33 + rjmp Sbox_one_column +load_column4: + mov x33, x3j + mov x3j, x34 + rjmp Sbox_one_column +load_column5: + mov x34, x3j + mov x3j, x35 + rjmp Sbox_one_column +load_column6: + mov x35, x3j + mov x3j, x36 + rjmp Sbox_one_column +load_column7: + mov x36, x3j + mov x3j, x37 + rjmp Sbox_one_column + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +LFSR_table: + rjmp LFSR6 + rjmp LFSR7 +LFSR6: + LFSR6_MACRO + rjmp LFSR_DONE +LFSR7: + LFSR7_MACRO + rjmp LFSR_DONE +#endif + +;;;;;;;;;;;;;;;;;;;;;;;; Real Start +AddRC_SubColumns_Start: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + clr ccnt + ld x0j, Y + eor x0j, rc + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + ldi ZL, pm_lo8(LFSR_table) + ldi ZH, pm_hi8(LFSR_table) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[1] = 1 for HASH + adiw ZL, 1 + ijmp +LFSR_DONE: +#elif defined(CRYPTO_AEAD) + LFSR6_MACRO ; only AEAD +#else + LFSR7_MACRO ; only HASH +#endif + + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + ldi ZL, pm_lo8(load_columns_table) + ldi ZH, pm_hi8(load_columns_table) + ijmp +Sbox_one_column: + Sbox x0j, x1j, x2j, x3j + + ; 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- x- -- 1 + ; -- -- -- -- x' -- -- -- 3 + ; 4 3 2 1 0 7 6 5 + ; Store a byte to Row 0 + st Y, x0j + ; Store a byte combined with ShiftRow1 + lsl t1j + mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left) + rol x1j + std Y + ROW_INBYTES, x1j + ; Store a byte combined with ShiftRow2 + inc ccnt + cpi ccnt, ROW_INBYTES + breq ROW2_WRAP + ldd t2j, Y + 2 * ROW_INBYTES + 1 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t2j + jmp NO_ROW2_WRAP +ROW2_WRAP: + std Y + ROW_INBYTES + 1, x2j + ; remain ShiftRow3 to be done at 'amend_shiftRow' +NO_ROW2_WRAP: + adiw YL, 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + + adiw ZL, 1 + ijmp + +amend_shiftRow: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + + ld x1j, Y + bst t1j, 7 + bld x1j, 0 + st Y, x1j + + ; <<< 1 + mov x37, x3j + rol x3j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + ; <<< 24 + ; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5 + mov x3j, x30 + mov x30, x35 + mov x35, x32 + mov x32, x37 + mov x37, x34 + mov x34, x31 + mov x31, x36 + mov x36, x33 + mov x33, x3j + + dec rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + + POP_CONFLICT +ret \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot192/avr8_lowrom/knot384.h b/knot/Implementations/crypto_aead/knot192/avr8_lowrom/knot384.h new file mode 100644 index 0000000..65c474a --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/avr8_lowrom/knot384.h @@ -0,0 +1,219 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +#include "assist.h" + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi rc, 0x01 + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + +round_loop_start: + rjmp AddRC_SubColumns_Start + +load_columns_table: + rjmp load_column0 + rjmp load_column1 + rjmp load_column2 + rjmp load_column3 + rjmp load_column4 + rjmp load_column5 + rjmp load_column6 + rjmp load_column7 + rjmp load_column8 + rjmp load_column9 + rjmp load_columna + rjmp load_columnb + rjmp amend_shiftRow + +load_column0: + mov x3j, x30 + rjmp Sbox_one_column +load_column1: + mov x30, x3j + mov x3j, x31 + rjmp Sbox_one_column +load_column2: + mov x31, x3j + mov x3j, x32 + rjmp Sbox_one_column +load_column3: + mov x32, x3j + mov x3j, x33 + rjmp Sbox_one_column +load_column4: + mov x33, x3j + mov x3j, x34 + rjmp Sbox_one_column +load_column5: + mov x34, x3j + mov x3j, x35 + rjmp Sbox_one_column +load_column6: + mov x35, x3j + mov x3j, x36 + rjmp Sbox_one_column +load_column7: + mov x36, x3j + mov x3j, x37 + rjmp Sbox_one_column +load_column8: + mov x37, x3j + mov x3j, x38 + rjmp Sbox_one_column +load_column9: + mov x38, x3j + mov x3j, x39 + rjmp Sbox_one_column +load_columna: + mov x39, x3j + mov x3j, x3a + rjmp Sbox_one_column +load_columnb: + mov x3a, x3j + mov x3j, x3b + rjmp Sbox_one_column + +;;;;;;;;;;;;;;;;;;;;;;;; Real Start +AddRC_SubColumns_Start: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + ldi ZL, pm_lo8(load_columns_table) + ldi ZH, pm_hi8(load_columns_table) + clr ccnt + ld x0j, Y + eor x0j, rc + LFSR7_MACRO + + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + ijmp +Sbox_one_column: + Sbox x0j, x1j, x2j, x3j + + ; b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- x- -- 1 + ; -- -- -- -- x' -- -- -- -- -- -- -- 7 + ; 4 3 2 1 0 b a 9 8 7 6 5 + ; Store a byte to Row 0 + st Y, x0j + ; Store a byte combined with ShiftRow 1 + lsl t1j + mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left) + rol x1j + std Y + ROW_INBYTES, x1j + ; Store a byte combined with ShiftRow 2 + inc ccnt + cpi ccnt, ROW_INBYTES + breq ROW2_WRAP + ldd t2j, Y + 2 * ROW_INBYTES + 1 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t2j + jmp NO_ROW2_WRAP +ROW2_WRAP: + std Y + ROW_INBYTES + 1, x2j + ; remain ShiftRow3 to be done at 'amend_shiftRow' +NO_ROW2_WRAP: + adiw YL, 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + + adiw ZL, 1 + ijmp + +amend_shiftRow: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + + ld x1j, Y + bst t1j, 7 + bld x1j, 0 + st Y, x1j + + ; >>> 1 + mov x3b, x3j + ror x3j + ror x3a + ror x39 + ror x38 + ror x37 + ror x36 + ror x35 + ror x34 + ror x33 + ror x32 + ror x31 + ror x30 + ror x3b + ; <<< 56 + ; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5 + ;mov x3j, x30 + ;mov x30, x35 + ;mov x35, x32 + ;mov x32, x37 + ;mov x37, x34 + ;mov x34, x31 + ;mov x31, x36 + ;mov x36, x33 + ;mov x33, x3j + mov x3j, x30 + mov x30, x35 + mov x35, x3a + mov x3a, x33 + mov x33, x38 + mov x38, x31 + mov x31, x36 + mov x36, x3b + mov x3b, x34 + mov x34, x39 + mov x39, x32 + mov x32, x37 + mov x37, x3j + + dec rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + + POP_CONFLICT +ret \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot192/avr8_lowrom/knot512.h b/knot/Implementations/crypto_aead/knot192/avr8_lowrom/knot512.h new file mode 100644 index 0000000..d24b353 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/avr8_lowrom/knot512.h @@ -0,0 +1,275 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +#include "assist.h" + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi rc, 0x01 + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + ld x3c, Y+ + ld x3d, Y+ + ld x3e, Y+ + ld x3f, Y+ + +round_loop_start: + rjmp AddRC_SubColumns_Start + +load_columns_table: + rjmp load_column0 + rjmp load_column1 + rjmp load_column2 + rjmp load_column3 + rjmp load_column4 + rjmp load_column5 + rjmp load_column6 + rjmp load_column7 + rjmp load_column8 + rjmp load_column9 + rjmp load_columna + rjmp load_columnb + rjmp load_columnc + rjmp load_columnd + rjmp load_columne + rjmp load_columnf + rjmp amend_shiftRow + +load_column0: + mov x3j, x30 + rjmp Sbox_one_column +load_column1: + mov x30, x3j + mov x3j, x31 + rjmp Sbox_one_column +load_column2: + mov x31, x3j + mov x3j, x32 + rjmp Sbox_one_column +load_column3: + mov x32, x3j + mov x3j, x33 + rjmp Sbox_one_column +load_column4: + mov x33, x3j + mov x3j, x34 + rjmp Sbox_one_column +load_column5: + mov x34, x3j + mov x3j, x35 + rjmp Sbox_one_column +load_column6: + mov x35, x3j + mov x3j, x36 + rjmp Sbox_one_column +load_column7: + mov x36, x3j + mov x3j, x37 + rjmp Sbox_one_column +load_column8: + mov x37, x3j + mov x3j, x38 + rjmp Sbox_one_column +load_column9: + mov x38, x3j + mov x3j, x39 + rjmp Sbox_one_column +load_columna: + mov x39, x3j + mov x3j, x3a + rjmp Sbox_one_column +load_columnb: + mov x3a, x3j + mov x3j, x3b + rjmp Sbox_one_column +load_columnc: + mov x3b, x3j + mov x3j, x3c + rjmp Sbox_one_column +load_columnd: + mov x3c, x3j + mov x3j, x3d + rjmp Sbox_one_column +load_columne: + mov x3d, x3j + mov x3j, x3e + rjmp Sbox_one_column +load_columnf: + mov x3e, x3j + mov x3j, x3f + rjmp Sbox_one_column + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +LFSR_table: + rjmp LFSR7 + rjmp LFSR8 +LFSR7: + LFSR7_MACRO + rjmp LFSR_DONE +LFSR8: + LFSR8_MACRO + rjmp LFSR_DONE +#endif + +;;;;;;;;;;;;;;;;;;;;;;;; Real Start +AddRC_SubColumns_Start: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + clr ccnt + ld x0j, Y + eor x0j, rc + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + ldi ZL, pm_lo8(LFSR_table) + ldi ZH, pm_hi8(LFSR_table) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[1] = 1 for HASH + adiw ZL, 1 + ijmp +LFSR_DONE: +#elif defined(CRYPTO_AEAD) + LFSR7_MACRO ; only AEAD +#else + LFSR8_MACRO ; only HASH +#endif + + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + ldd t2j, Y + 2 * ROW_INBYTES + 1 + ldi ZL, pm_lo8(load_columns_table) + ldi ZH, pm_hi8(load_columns_table) + ijmp +Sbox_one_column: + Sbox x0j, x1j, x2j, x3j + + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2 + ; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3 + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + ; Store a byte to Row 0 + st Y, x0j + ; Store a byte combined with ShiftRow1 + lsl t1j + mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left) + rol x1j + std Y + ROW_INBYTES, x1j + ; Store a byte combined with ShiftRow2 + inc ccnt + cpi ccnt, ROW_INBYTES - 1 + brsh ROW2_WRAP + ldd tmp0, Y + 2 * ROW_INBYTES + 2 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes + std Y + 2 * ROW_INBYTES + 2, x2j + mov x2j, t2j + mov t2j, tmp0 + jmp NO_ROW2_WRAP +ROW2_WRAP: + std Y + ROW_INBYTES + 2, x2j + mov x2j, t2j + + ; remain ShiftRow3 to be done at 'amend_shiftRow' +NO_ROW2_WRAP: + adiw YL, 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + + adiw ZL, 1 + ijmp + +amend_shiftRow: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + + ld x1j, Y + bst t1j, 7 + bld x1j, 0 + st Y, x1j + + ; <<< 1 + mov x3f, x3j + rol x3j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + rol x38 + rol x39 + rol x3a + rol x3b + rol x3c + rol x3d + rol x3e + rol x3f + ; <<< 24 + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 => + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + mov x3j, x30 + mov x30, x3d + mov x3d, x3a + mov x3a, x37 + mov x37, x34 + mov x34, x31 + mov x31, x3e + mov x3e, x3b + mov x3b, x38 + mov x38, x35 + mov x35, x32 + mov x32, x3f + mov x3f, x3c + mov x3c, x39 + mov x39, x36 + mov x36, x33 + mov x33, x3j + + dec rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + st Y+, x3c + st Y+, x3d + st Y+, x3e + st Y+, x3f + + POP_CONFLICT +ret \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot192/avr8_lowrom/permutation.h b/knot/Implementations/crypto_aead/knot192/avr8_lowrom/permutation.h new file mode 100644 index 0000000..a57c5d3 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/avr8_lowrom/permutation.h @@ -0,0 +1,109 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; R E G I S T E R D E F I N I T I O N S +; ============================================ +; + +#define mclen r16 +#define radlen r17 +#define tcnt r17 +#define tmp0 r20 +#define tmp1 r21 +#define cnt0 r22 +#define rn r23 +#define rate r24 + +; +; ; AEDH = 0b000: for authenticate AD +; ; AEDH = 0b001: for encryption +; ; AEDH = 0b011: for decryption +; ; AEDH = 0b100: for hash +; #define AEDH r25 ; Register used globally within this program +; +; #define x30 r0 ; Register used without overlapping +; #define x31 r1 ; Register used without overlapping +; #define x32 r2 ; Register used without overlapping +; #define x33 r3 ; Register used without overlapping +; #define x34 r4 ; Register used without overlapping +; #define x35 r5 ; Register used without overlapping +; #define x36 r6 ; Register used without overlapping +; #define x37 r7 ; Register used without overlapping +; #define x38 r8 ; Register used without overlapping +; #define x39 r9 ; Register used without overlapping +; #define x3a r10 ; Register used without overlapping +; #define x3b r11 ; Register used without overlapping +; #define x3c r12 ; Register used without overlapping +; #define x3d r13 ; Register used without overlapping +; #define x3e r14 ; Register used without overlapping +; #define x3f r15 ; Register used without overlapping +; +; #define x0j r16 ; Register used overlapped, should be backed up before using +; #define x1j r17 ; Register used overlapped, should be backed up before using +; #define x2j r18 ; Register used overlapped, should be backed up before using +; #define x3j r19 ; Register used overlapped, should be backed up before using +; +; ; t2j used in knot512 to keep one byte in Row2 (because of rotating 16-bit), +; ; will not be interupt with LFSR which uses the overlapped register tmp1 +; #define t2j r21 ; Temporary register, used freely +; #define t1j r22 ; Temporary register, used freely +; #define t3j r23 ; Temporary register, used freely +; +; #define rc r24 ; Register used overlapped, should be backed up before using +; #define rcnt r26 ; Register used overlapped, should be backed up before using +; #define ccnt r27 ; Register used overlapped, should be backed up before using + +#define AEDH r25 +#define x30 r0 +#define x31 r1 +#define x32 r2 +#define x33 r3 +#define x34 r4 +#define x35 r5 +#define x36 r6 +#define x37 r7 +#define x38 r8 +#define x39 r9 +#define x3a r10 +#define x3b r11 +#define x3c r12 +#define x3d r13 +#define x3e r14 +#define x3f r15 + +#define x0j r16 +#define x1j r17 +#define x2j r18 +#define x3j r19 + +; t2j used in knot512 to keep one byte in Row2 (because of rotating 16-bit), +; will not be interupt with LFSR which uses the overlapped register tmp1 +#define t2j r21 +#define t1j r22 +#define t3j r23 + +#define rc r24 +#define rcnt r26 +#define ccnt r27 + +#if (STATE_INBITS==256) +#include "knot256.h" +#elif (STATE_INBITS==384) +#include "knot384.h" +#elif (STATE_INBITS==512) +#include "knot512.h" +#else +#error "Not specified key size and state size" +#endif + + diff --git a/knot/Implementations/crypto_aead/knot256/avr8_lowrom/api.h b/knot/Implementations/crypto_aead/knot256/avr8_lowrom/api.h new file mode 100644 index 0000000..5c0f032 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/avr8_lowrom/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 32 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 32 +#define CRYPTO_ABYTES 32 +#define CRYPTO_NOOVERLAP 1 diff --git a/knot/Implementations/crypto_aead/knot256/avr8_lowrom/assist.h b/knot/Implementations/crypto_aead/knot256/avr8_lowrom/assist.h new file mode 100644 index 0000000..cb903a5 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/avr8_lowrom/assist.h @@ -0,0 +1,140 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +.macro LFSR6_MACRO + bst rc, 5 + bld tmp0, 0 + bst rc, 4 + bld tmp1, 0 + eor tmp0, tmp1 + ror tmp0 + rol rc + andi rc, 0x3F +.endm + +.macro LFSR7_MACRO + bst rc, 6 + bld tmp0, 0 + bst rc, 5 + bld tmp1, 0 + eor tmp0, tmp1 + ror tmp0 + rol rc + andi rc, 0x7F +.endm + +.macro LFSR8_MACRO + bst rc, 7 + bld tmp0, 0 + bst rc, 5 + bld tmp1, 0 + eor tmp0, tmp1 + bst rc, 4 + bld tmp1, 0 + eor tmp0, tmp1 + bst rc, 3 + bld tmp1, 0 + eor tmp0, tmp1 + ror tmp0 + rol rc +.endm + +.macro Sbox i0, i1, i2, i3 + mov tmp0, \i1 + com \i0 + and \i1, \i0 + eor \i1, \i2 + or \i2, tmp0 + eor \i0, \i3 + eor \i2, \i0 + eor tmp0, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, tmp0 + and tmp0, \i2 + eor \i1, tmp0 +.endm + +.macro PUSH_CONFLICT + push r16 + push r17 + push r18 + push r19 + + push r23 + push r24 + + push r26 + push r27 + push r28 + push r29 + push r30 + push r31 +.endm + +.macro POP_CONFLICT + pop r31 + pop r30 + pop r29 + pop r28 + pop r27 + pop r26 + + pop r24 + pop r23 + + pop r19 + pop r18 + pop r17 + pop r16 +.endm + +.macro PUSH_ALL + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 +.endm + +.macro POP_ALL + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + clr r1 +.endm \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot256/avr8_lowrom/config.h b/knot/Implementations/crypto_aead/knot256/avr8_lowrom/config.h new file mode 100644 index 0000000..8fb6034 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/avr8_lowrom/config.h @@ -0,0 +1,131 @@ +#ifndef __CONFIG_H__ +#define __CONFIG_H__ + +#define CRYPTO_AEAD +//#define CRYPTO_HASH + +#define MAX_MESSAGE_LENGTH 128 + +#define STATE_INBITS 512 +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBITS 256 +/* For CRYPTO_HASH */ +#define CRYPTO_BITS 512 + +#define STATE_INBYTES ((STATE_INBITS + 7) / 8) +#define ROW_INBITS ((STATE_INBITS + 3) / 4) +#define ROW_INBYTES ((ROW_INBITS + 7) / 8) + +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8) +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES +#define CRYPTO_ABYTES CRYPTO_KEYBYTES +#define CRYPTO_NOOVERLAP 1 + +#define MAX_ASSOCIATED_DATA_LENGTH 32 +#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES) + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +/* For CRYPTO_HASH */ +#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8) + + + +#define DOMAIN_BITS 0x80 +#define PAD_BITS 0x01 +#define S384_R192_BITS 0x80 + +#if (STATE_INBITS==256) +#define C1 1 +#define C2 8 +#define C3 25 +#elif (STATE_INBITS==384) +#define C1 1 +#define C2 8 +#define C3 55 +#elif (STATE_INBITS==512) +#define C1 1 +#define C2 16 +#define C3 25 +#else +#error "Not specified state size" +#endif + +#ifdef CRYPTO_AEAD +/* For CRYPTO_AEAD */ +#define KEY_INBITS (CRYPTO_KEYBYTES * 8) +#define KEY_INBYTES (CRYPTO_KEYBYTES) + +#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8) +#define NONCE_INBYTES (CRYPTO_NPUBBYTES) + +#define TAG_INBITS (CRYPTO_ABYTES * 8) +#define TAG_INBYTES (CRYPTO_ABYTES) + +#if (KEY_INBITS==128) && (STATE_INBITS==256) +#define RATE_INBITS 64 +#define NR_0 52 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==128) && (STATE_INBITS==384) +#define RATE_INBITS 192 +#define NR_0 76 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==192) && (STATE_INBITS==384) +#define RATE_INBITS 96 +#define NR_0 76 +#define NR_i 40 +#define NR_f 44 +#elif (KEY_INBITS==256) && (STATE_INBITS==512) +#define RATE_INBITS 128 +#define NR_0 100 +#define NR_i 52 +#define NR_f 56 +#else +#error "Not specified key size and state size" +#endif + +#define RATE_INBYTES ((RATE_INBITS + 7) / 8) +#define SQUEEZE_RATE_INBYTES TAG_INBYTES + +#endif + +#ifdef CRYPTO_HASH +/* For CRYPTO_HASH */ +#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8) + +#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256) +#define HASH_RATE_INBITS 32 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 68 +#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 128 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 80 +#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 48 +#define HASH_SQUEEZE_RATE_INBITS 192 +#define NR_h 104 +#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512) +#define HASH_RATE_INBITS 64 +#define HASH_SQUEEZE_RATE_INBITS 256 +#define NR_h 140 +#else +#error "Not specified hash digest size and state size" +#endif + +#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8) +#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8) + +#endif + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot256/avr8_lowrom/crypto_aead.h b/knot/Implementations/crypto_aead/knot256/avr8_lowrom/crypto_aead.h new file mode 100644 index 0000000..cd820d3 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/avr8_lowrom/crypto_aead.h @@ -0,0 +1,26 @@ +#ifdef __cplusplus +extern "C" { +#endif + +int crypto_aead_encrypt( + unsigned char *c,unsigned long long *clen, + const unsigned char *m,unsigned long long mlen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ); + + +int crypto_aead_decrypt( + unsigned char *m,unsigned long long *outputmlen, + unsigned char *nsec, + const unsigned char *c,unsigned long long clen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ); + +#ifdef __cplusplus +} +#endif diff --git a/knot/Implementations/crypto_aead/knot256/avr8_lowrom/encrypt.c b/knot/Implementations/crypto_aead/knot256/avr8_lowrom/encrypt.c new file mode 100644 index 0000000..baf0a3b --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/avr8_lowrom/encrypt.c @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#include "config.h" + +extern void crypto_aead_encrypt_asm( + unsigned char *c, + const unsigned char *m, + unsigned char mlen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern int crypto_aead_decrypt_asm( + unsigned char *m, + const unsigned char *c, + unsigned char clen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern void crypto_hash_asm( + unsigned char *out, + const unsigned char *in, + unsigned char inlen + ); + + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the cipher implementation goes here, + ... generating a ciphertext c[0],c[1],...,c[*clen-1] + ... from a plaintext m[0],m[1],...,m[mlen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce npub[0],npub[1],.. + ... and secret key k[0],k[1],... + ... the implementation shall not use nsec + ... + ... return 0; + */ + + (void)nsec; + + crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k); + + *clen = mlen + TAG_INBYTES; + return 0; +} + + + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the AEAD implementation goes here, + ... generating a plaintext m[0],m[1],...,m[*mlen-1] + ... and secret message number nsec[0],nsec[1],... + ... from a ciphertext c[0],c[1],...,c[clen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce number npub[0],npub[1],... + ... and secret key k[0],k[1],... + ... + ... return 0; + */ + unsigned long long mlen_; + unsigned char tag_is_match; + + (void)nsec; + if (clen < CRYPTO_ABYTES) { + return -1; + } + mlen_ = clen - CRYPTO_ABYTES; + + tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k); + + if (tag_is_match != 0) + { + memset(m, 0, (size_t)mlen_); + return -1; + } + + *mlen = mlen_; + return 0; +} \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot256/avr8_lowrom/encrypt_core.S b/knot/Implementations/crypto_aead/knot256/avr8_lowrom/encrypt_core.S new file mode 100644 index 0000000..cb7aed5 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/avr8_lowrom/encrypt_core.S @@ -0,0 +1,537 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; S R A M D E F I N I T I O N S +; ============================================ +; +#include +#include "config.h" + +.section .noinit + SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (STATE_INBYTES > 32) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (STATE_INBYTES > 48) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_LEN: .BYTE 0, 0 +#ifdef CRYPTO_AEAD +; For CRYPTO_AEAD + SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0 + SRAM_ADLEN: .BYTE 0, 0 + SRAM_NONCE_ADDR: .BYTE 0, 0 + SRAM_KEY_ADDR: .BYTE 0, 0 + + SRAM_ADDITIONAL: + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (CRYPTO_ABYTES > 16) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (CRYPTO_ABYTES > 24) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + +#endif + +.section .text + +#include "permutation.h" + +; require YH:YL be the address of the current associated data/cipher/message block +; for enc and dec, store ciphertext or plaintext +; require ZH:ZL be the address of the current cipher/message block +XOR_to_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov cnt0, rate +XOR_to_State_loop: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_to_State_loop +; YH:YL are now the address of the next associated data block +ret + +; require YH:YL pointed to the input data +; require ZH:ZL pointed to the output data +; require cnt0 containes the nubmer of bytes in source data +; require number of bytes in source data less than rate, i.e., 0 <= cnt0 < rate +; +; the 0th bit in AEDH is used to distinguish (auth AD) or (enc/dec M/C): +; AEDH[0] = 0 for (auth AD), AEDH[0] = 1 for (enc/dec M/C) +; the 1th bit in AEDH is used to distinguish (auth AD/enc M) or (dec C): +; AEDH[1] = 0 for (auth AD/enc M), AEDH[1] = 1 for (dec C) +; AEDH = 0b000 for (auth AD) +; AEDH = 0b001 for (enc M) +; AEDH = 0b011 for (dec C) +Pad_XOR_to_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + tst cnt0 + breq XOR_padded_data +XOR_source_data_loop: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_source_data_loop +XOR_padded_data: + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 +ret + +AddDomain: + ldi XH, hi8(SRAM_STATE + STATE_INBYTES - 1) + ldi XL, lo8(SRAM_STATE + STATE_INBYTES - 1) + ldi tmp0, DOMAIN_BITS + ld tmp1, X + eor tmp0, tmp1 + st X, tmp0 +ret + +; require ZH:ZL be the address of the destination +EXTRACT_from_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov tmp1, rate +EXTRACT_from_State_loop: + ld tmp0, X+ + st Z+, tmp0 + dec tmp1 + brne EXTRACT_from_State_loop +ret + +AUTH: + tst radlen + breq AUTH_end + + cp radlen, rate + brlo auth_ad_padded_block + +auth_ad_loop: + rcall XOR_to_State + rcall Permutation + sub radlen, rate + cp radlen, rate + brlo auth_ad_padded_block + rjmp auth_ad_loop + +auth_ad_padded_block: + mov cnt0, radlen + rcall Pad_XOR_to_State + rcall Permutation + +AUTH_end: +ret + +#ifdef CRYPTO_AEAD +Initialization: + ldi rn, NR_0 + ldi XL, lo8(SRAM_STATE) + ldi XH, hi8(SRAM_STATE) + + lds YH, SRAM_NONCE_ADDR + lds YL, SRAM_NONCE_ADDR + 1 + ldi cnt0, CRYPTO_NPUBBYTES +load_nonce_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_nonce_loop + + lds YH, SRAM_KEY_ADDR + lds YL, SRAM_KEY_ADDR + 1 + ldi cnt0, CRYPTO_KEYBYTES +load_key_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_key_loop + +#if (STATE_INBITS==384) && (RATE_INBITS==192) + ldi cnt0, (STATE_INBYTES - CRYPTO_NPUBBYTES - CRYPTO_KEYBYTES - 1) + clr tmp0 +empty_state_loop: + st X+, tmp0 + dec cnt0 + brne empty_state_loop + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + rcall Permutation +ret + +ENC: + tst mclen + breq ENC_end + + cp mclen, rate + brlo enc_padded_block + +enc_loop: + rcall XOR_to_State + ldi rn, NR_i + rcall Permutation + sub mclen, rate + cp mclen, rate + brlo enc_padded_block + rjmp enc_loop + +enc_padded_block: + mov cnt0, mclen + rcall Pad_XOR_to_State +ENC_end: +ret + +Finalization: + ldi rate, SQUEEZE_RATE_INBYTES + ldi rn, NR_f + rcall Permutation + rcall EXTRACT_from_State +ret + +; void crypto_aead_encrypt_asm( +; unsigned char *c, +; const unsigned char *m, +; unsigned long long mlen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *c, is passed in r24:r25 +; const unsigned char *m, is passed in r22:r23 +; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_encrypt_asm +crypto_aead_encrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b001 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + rcall Finalization + POP_ALL +ret + +; int crypto_aead_decrypt_asm( +; unsigned char *m, +; const unsigned char *c, +; unsigned long long clen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *m, is passed in r24:r25 +; const unsigned char *c, is passed in r22:r23 +; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_decrypt_asm +crypto_aead_decrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b011 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + + ldi ZH, hi8(SRAM_ADDITIONAL) + ldi ZL, lo8(SRAM_ADDITIONAL) + rcall Finalization + + sbiw ZL, CRYPTO_ABYTES + ldi cnt0, CRYPTO_ABYTES +compare_tag: + ld tmp0, Z+ + ld tmp1, Y+ + cp tmp0, tmp1 + brne return_tag_not_match + dec cnt0 + brne compare_tag + rjmp return_tag_match + +return_tag_not_match: + ldi r25, 0xFF + ldi r24, 0xFF + rjmp crypto_aead_decrypt_end + +return_tag_match: + clr r25 + clr r24 +crypto_aead_decrypt_end: + POP_ALL +ret + +; #ifdef CRYPTO_AEAD +#endif + + +#ifdef CRYPTO_HASH + +; void crypto_hash_asm( +; unsigned char *out, +; const unsigned char *in, +; unsigned long long inlen +; ) +; +; unsigned char *out, is passed in r24:r25 +; const unsigned char *in, is passed in r22:r23 +; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used +.global crypto_hash_asm +crypto_hash_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + mov mclen, r20 + + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi cnt0, STATE_INBYTES - 1 +#else + ldi cnt0, STATE_INBYTES +#endif + clr tmp0 +zero_state: + st X+, tmp0 + dec cnt0 + brne zero_state + +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + ldi rn, NR_h + ldi AEDH, 0b100 + +HASH_ABSORBING: + mov radlen, mclen + tst radlen + breq EMPTY_M + ldi rate, HASH_RATE_INBYTES + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + rcall AUTH + rjmp HASH_SQUEEZING + +EMPTY_M: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 + rcall Permutation + +HASH_SQUEEZING: + ldi rate, HASH_SQUEEZE_RATE_INBYTES + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + ldi tcnt, CRYPTO_BYTES +SQUEEZING_loop: + rcall EXTRACT_from_State + subi tcnt, HASH_SQUEEZE_RATE_INBYTES + breq HASH_SQUEEZING_end + rcall Permutation + rjmp SQUEEZING_loop +HASH_SQUEEZING_end: + POP_ALL +ret + +#endif + + +; Byte Order In AVR 8: +; KNOT-AEAD(128, 256, 64): +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[7] <<< 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Perm_row_1[0] 1 +; N[ 9] AEAD_State[ 9] | Perm_row_1[1] 1 +; N[10] AEAD_State[10] | Perm_row_1[2] 1 +; N[11] AEAD_State[11] | Perm_row_1[3] 1 +; N[12] AEAD_State[12] | Perm_row_1[4] 1 +; N[13] AEAD_State[13] | Perm_row_1[5] 1 +; N[14] AEAD_State[14] | Perm_row_1[6] 1 +; N[15] AEAD_State[15] | Perm_row_1[7] <<< 1 +; K[ 0] AEAD_State[16] | Perm_row_2[0] 8 +; K[ 1] AEAD_State[17] | Perm_row_2[1] 8 +; K[ 2] AEAD_State[18] | Perm_row_2[2] 8 +; K[ 3] AEAD_State[19] | Perm_row_2[3] 8 +; K[ 4] AEAD_State[20] | Perm_row_2[4] 8 +; K[ 5] AEAD_State[21] | Perm_row_2[5] 8 +; K[ 6] AEAD_State[22] | Perm_row_2[6] 8 +; K[ 7] AEAD_State[23] | Perm_row_2[7] <<< 8 +; K[ 8] AEAD_State[24] | Perm_row_3[0] 25 +; K[ 9] AEAD_State[25] | Perm_row_3[1] 25 +; K[10] AEAD_State[26] | Perm_row_3[2] 25 +; K[11] AEAD_State[27] | Perm_row_3[3] 25 +; K[12] AEAD_State[28] | Perm_row_3[4] 25 +; K[13] AEAD_State[29] | Perm_row_3[5] 25 +; K[14] AEAD_State[30] | Perm_row_3[6] 25 +; K[15] AEAD_State[31] | ^0x80 Perm_row_3[7] <<< 25 +; +; +; KNOT-AEAD(128, 384, 192): +; Initalization +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[ 0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[ 1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[ 2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[ 3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[ 4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[ 5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[ 6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[ 7] 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Message[ 8] 0x00 Perm_row_0[ 8] 0 Tag[ 8] +; N[ 9] AEAD_State[ 9] | Message[ 9] 0x00 Perm_row_0[ 9] 0 Tag[ 9] +; N[10] AEAD_State[10] | Message[10] 0x00 Perm_row_0[10] 0 Tag[10] +; N[11] AEAD_State[11] | Message[11] 0x00 Perm_row_0[11] <<< 0 Tag[11] +; N[12] AEAD_State[12] | Message[12] 0x00 Perm_row_1[ 0] 1 Tag[12] +; N[13] AEAD_State[13] | Message[13] 0x00 Perm_row_1[ 1] 1 Tag[13] +; N[14] AEAD_State[14] | Message[14] 0x00 Perm_row_1[ 2] 1 Tag[14] +; N[15] AEAD_State[15] | Message[15] 0x00 Perm_row_1[ 3] 1 Tag[15] +; K[ 0] AEAD_State[16] | Message[16] 0x00 Perm_row_1[ 4] 1 +; K[ 1] AEAD_State[17] | Message[17] 0x00 Perm_row_1[ 5] 1 +; K[ 2] AEAD_State[18] | Message[18] 0x00 Perm_row_1[ 6] 1 +; K[ 3] AEAD_State[19] | Message[19] 0x00 Perm_row_1[ 7] 1 +; K[ 4] AEAD_State[20] | Message[20] 0x00 Perm_row_1[ 8] 1 +; K[ 5] AEAD_State[21] | Message[21] 0x00 Perm_row_1[ 9] 1 +; K[ 6] AEAD_State[22] | Message[22] 0x00 Perm_row_1[10] 1 +; K[ 7] AEAD_State[23] | Message[23] 0x00 Perm_row_1[11] <<< 1 +; K[ 8] AEAD_State[24] | Perm_row_2[ 0] 8 +; K[ 9] AEAD_State[25] | Perm_row_2[ 1] 8 +; K[10] AEAD_State[26] | Perm_row_2[ 2] 8 +; K[11] AEAD_State[27] | Perm_row_2[ 3] 8 +; K[12] AEAD_State[28] | Perm_row_2[ 4] 8 +; K[13] AEAD_State[29] | Perm_row_2[ 5] 8 +; K[14] AEAD_State[30] | Perm_row_2[ 6] 8 +; K[15] AEAD_State[31] | Perm_row_2[ 7] 8 +; 0x00 AEAD_State[32] | Perm_row_2[ 8] 8 +; 0x00 AEAD_State[33] | Perm_row_2[ 9] 8 +; 0x00 AEAD_State[34] | Perm_row_2[10] 8 +; 0x00 AEAD_State[35] | Perm_row_2[11] <<< 8 +; 0x00 AEAD_State[36] | Perm_row_3[ 0] 55 +; 0x00 AEAD_State[37] | Perm_row_3[ 1] 55 +; 0x00 AEAD_State[38] | Perm_row_3[ 2] 55 +; 0x00 AEAD_State[39] | Perm_row_3[ 3] 55 +; 0x00 AEAD_State[40] | Perm_row_3[ 4] 55 +; 0x00 AEAD_State[41] | Perm_row_3[ 5] 55 +; 0x00 AEAD_State[42] | Perm_row_3[ 6] 55 +; 0x00 AEAD_State[43] | Perm_row_3[ 7] 55 +; 0x00 AEAD_State[44] | Perm_row_3[ 8] 55 +; 0x00 AEAD_State[45] | Perm_row_3[ 9] 55 +; 0x00 AEAD_State[46] | Perm_row_3[10] 55 +; 0x00 ^0x80 AEAD_State[47] | ^0x80 Perm_row_3[11] <<< 55 diff --git a/knot/Implementations/crypto_aead/knot256/avr8_lowrom/knot256.h b/knot/Implementations/crypto_aead/knot256/avr8_lowrom/knot256.h new file mode 100644 index 0000000..d16bf8c --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/avr8_lowrom/knot256.h @@ -0,0 +1,197 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +#include "assist.h" + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi rc, 0x01 + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + +round_loop_start: + rjmp AddRC_SubColumns_Start + +load_columns_table: + rjmp load_column0 + rjmp load_column1 + rjmp load_column2 + rjmp load_column3 + rjmp load_column4 + rjmp load_column5 + rjmp load_column6 + rjmp load_column7 + rjmp amend_shiftRow + +load_column0: + mov x3j, x30 + rjmp Sbox_one_column +load_column1: + mov x30, x3j + mov x3j, x31 + rjmp Sbox_one_column +load_column2: + mov x31, x3j + mov x3j, x32 + rjmp Sbox_one_column +load_column3: + mov x32, x3j + mov x3j, x33 + rjmp Sbox_one_column +load_column4: + mov x33, x3j + mov x3j, x34 + rjmp Sbox_one_column +load_column5: + mov x34, x3j + mov x3j, x35 + rjmp Sbox_one_column +load_column6: + mov x35, x3j + mov x3j, x36 + rjmp Sbox_one_column +load_column7: + mov x36, x3j + mov x3j, x37 + rjmp Sbox_one_column + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +LFSR_table: + rjmp LFSR6 + rjmp LFSR7 +LFSR6: + LFSR6_MACRO + rjmp LFSR_DONE +LFSR7: + LFSR7_MACRO + rjmp LFSR_DONE +#endif + +;;;;;;;;;;;;;;;;;;;;;;;; Real Start +AddRC_SubColumns_Start: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + clr ccnt + ld x0j, Y + eor x0j, rc + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + ldi ZL, pm_lo8(LFSR_table) + ldi ZH, pm_hi8(LFSR_table) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[1] = 1 for HASH + adiw ZL, 1 + ijmp +LFSR_DONE: +#elif defined(CRYPTO_AEAD) + LFSR6_MACRO ; only AEAD +#else + LFSR7_MACRO ; only HASH +#endif + + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + ldi ZL, pm_lo8(load_columns_table) + ldi ZH, pm_hi8(load_columns_table) + ijmp +Sbox_one_column: + Sbox x0j, x1j, x2j, x3j + + ; 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- x- -- 1 + ; -- -- -- -- x' -- -- -- 3 + ; 4 3 2 1 0 7 6 5 + ; Store a byte to Row 0 + st Y, x0j + ; Store a byte combined with ShiftRow1 + lsl t1j + mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left) + rol x1j + std Y + ROW_INBYTES, x1j + ; Store a byte combined with ShiftRow2 + inc ccnt + cpi ccnt, ROW_INBYTES + breq ROW2_WRAP + ldd t2j, Y + 2 * ROW_INBYTES + 1 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t2j + jmp NO_ROW2_WRAP +ROW2_WRAP: + std Y + ROW_INBYTES + 1, x2j + ; remain ShiftRow3 to be done at 'amend_shiftRow' +NO_ROW2_WRAP: + adiw YL, 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + + adiw ZL, 1 + ijmp + +amend_shiftRow: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + + ld x1j, Y + bst t1j, 7 + bld x1j, 0 + st Y, x1j + + ; <<< 1 + mov x37, x3j + rol x3j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + ; <<< 24 + ; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5 + mov x3j, x30 + mov x30, x35 + mov x35, x32 + mov x32, x37 + mov x37, x34 + mov x34, x31 + mov x31, x36 + mov x36, x33 + mov x33, x3j + + dec rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + + POP_CONFLICT +ret \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot256/avr8_lowrom/knot384.h b/knot/Implementations/crypto_aead/knot256/avr8_lowrom/knot384.h new file mode 100644 index 0000000..65c474a --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/avr8_lowrom/knot384.h @@ -0,0 +1,219 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +#include "assist.h" + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi rc, 0x01 + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + +round_loop_start: + rjmp AddRC_SubColumns_Start + +load_columns_table: + rjmp load_column0 + rjmp load_column1 + rjmp load_column2 + rjmp load_column3 + rjmp load_column4 + rjmp load_column5 + rjmp load_column6 + rjmp load_column7 + rjmp load_column8 + rjmp load_column9 + rjmp load_columna + rjmp load_columnb + rjmp amend_shiftRow + +load_column0: + mov x3j, x30 + rjmp Sbox_one_column +load_column1: + mov x30, x3j + mov x3j, x31 + rjmp Sbox_one_column +load_column2: + mov x31, x3j + mov x3j, x32 + rjmp Sbox_one_column +load_column3: + mov x32, x3j + mov x3j, x33 + rjmp Sbox_one_column +load_column4: + mov x33, x3j + mov x3j, x34 + rjmp Sbox_one_column +load_column5: + mov x34, x3j + mov x3j, x35 + rjmp Sbox_one_column +load_column6: + mov x35, x3j + mov x3j, x36 + rjmp Sbox_one_column +load_column7: + mov x36, x3j + mov x3j, x37 + rjmp Sbox_one_column +load_column8: + mov x37, x3j + mov x3j, x38 + rjmp Sbox_one_column +load_column9: + mov x38, x3j + mov x3j, x39 + rjmp Sbox_one_column +load_columna: + mov x39, x3j + mov x3j, x3a + rjmp Sbox_one_column +load_columnb: + mov x3a, x3j + mov x3j, x3b + rjmp Sbox_one_column + +;;;;;;;;;;;;;;;;;;;;;;;; Real Start +AddRC_SubColumns_Start: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + ldi ZL, pm_lo8(load_columns_table) + ldi ZH, pm_hi8(load_columns_table) + clr ccnt + ld x0j, Y + eor x0j, rc + LFSR7_MACRO + + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + ijmp +Sbox_one_column: + Sbox x0j, x1j, x2j, x3j + + ; b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- x- -- 1 + ; -- -- -- -- x' -- -- -- -- -- -- -- 7 + ; 4 3 2 1 0 b a 9 8 7 6 5 + ; Store a byte to Row 0 + st Y, x0j + ; Store a byte combined with ShiftRow 1 + lsl t1j + mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left) + rol x1j + std Y + ROW_INBYTES, x1j + ; Store a byte combined with ShiftRow 2 + inc ccnt + cpi ccnt, ROW_INBYTES + breq ROW2_WRAP + ldd t2j, Y + 2 * ROW_INBYTES + 1 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t2j + jmp NO_ROW2_WRAP +ROW2_WRAP: + std Y + ROW_INBYTES + 1, x2j + ; remain ShiftRow3 to be done at 'amend_shiftRow' +NO_ROW2_WRAP: + adiw YL, 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + + adiw ZL, 1 + ijmp + +amend_shiftRow: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + + ld x1j, Y + bst t1j, 7 + bld x1j, 0 + st Y, x1j + + ; >>> 1 + mov x3b, x3j + ror x3j + ror x3a + ror x39 + ror x38 + ror x37 + ror x36 + ror x35 + ror x34 + ror x33 + ror x32 + ror x31 + ror x30 + ror x3b + ; <<< 56 + ; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5 + ;mov x3j, x30 + ;mov x30, x35 + ;mov x35, x32 + ;mov x32, x37 + ;mov x37, x34 + ;mov x34, x31 + ;mov x31, x36 + ;mov x36, x33 + ;mov x33, x3j + mov x3j, x30 + mov x30, x35 + mov x35, x3a + mov x3a, x33 + mov x33, x38 + mov x38, x31 + mov x31, x36 + mov x36, x3b + mov x3b, x34 + mov x34, x39 + mov x39, x32 + mov x32, x37 + mov x37, x3j + + dec rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + + POP_CONFLICT +ret \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot256/avr8_lowrom/knot512.h b/knot/Implementations/crypto_aead/knot256/avr8_lowrom/knot512.h new file mode 100644 index 0000000..d24b353 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/avr8_lowrom/knot512.h @@ -0,0 +1,275 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +#include "assist.h" + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi rc, 0x01 + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + ld x3c, Y+ + ld x3d, Y+ + ld x3e, Y+ + ld x3f, Y+ + +round_loop_start: + rjmp AddRC_SubColumns_Start + +load_columns_table: + rjmp load_column0 + rjmp load_column1 + rjmp load_column2 + rjmp load_column3 + rjmp load_column4 + rjmp load_column5 + rjmp load_column6 + rjmp load_column7 + rjmp load_column8 + rjmp load_column9 + rjmp load_columna + rjmp load_columnb + rjmp load_columnc + rjmp load_columnd + rjmp load_columne + rjmp load_columnf + rjmp amend_shiftRow + +load_column0: + mov x3j, x30 + rjmp Sbox_one_column +load_column1: + mov x30, x3j + mov x3j, x31 + rjmp Sbox_one_column +load_column2: + mov x31, x3j + mov x3j, x32 + rjmp Sbox_one_column +load_column3: + mov x32, x3j + mov x3j, x33 + rjmp Sbox_one_column +load_column4: + mov x33, x3j + mov x3j, x34 + rjmp Sbox_one_column +load_column5: + mov x34, x3j + mov x3j, x35 + rjmp Sbox_one_column +load_column6: + mov x35, x3j + mov x3j, x36 + rjmp Sbox_one_column +load_column7: + mov x36, x3j + mov x3j, x37 + rjmp Sbox_one_column +load_column8: + mov x37, x3j + mov x3j, x38 + rjmp Sbox_one_column +load_column9: + mov x38, x3j + mov x3j, x39 + rjmp Sbox_one_column +load_columna: + mov x39, x3j + mov x3j, x3a + rjmp Sbox_one_column +load_columnb: + mov x3a, x3j + mov x3j, x3b + rjmp Sbox_one_column +load_columnc: + mov x3b, x3j + mov x3j, x3c + rjmp Sbox_one_column +load_columnd: + mov x3c, x3j + mov x3j, x3d + rjmp Sbox_one_column +load_columne: + mov x3d, x3j + mov x3j, x3e + rjmp Sbox_one_column +load_columnf: + mov x3e, x3j + mov x3j, x3f + rjmp Sbox_one_column + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +LFSR_table: + rjmp LFSR7 + rjmp LFSR8 +LFSR7: + LFSR7_MACRO + rjmp LFSR_DONE +LFSR8: + LFSR8_MACRO + rjmp LFSR_DONE +#endif + +;;;;;;;;;;;;;;;;;;;;;;;; Real Start +AddRC_SubColumns_Start: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + clr ccnt + ld x0j, Y + eor x0j, rc + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + ldi ZL, pm_lo8(LFSR_table) + ldi ZH, pm_hi8(LFSR_table) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[1] = 1 for HASH + adiw ZL, 1 + ijmp +LFSR_DONE: +#elif defined(CRYPTO_AEAD) + LFSR7_MACRO ; only AEAD +#else + LFSR8_MACRO ; only HASH +#endif + + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + ldd t2j, Y + 2 * ROW_INBYTES + 1 + ldi ZL, pm_lo8(load_columns_table) + ldi ZH, pm_hi8(load_columns_table) + ijmp +Sbox_one_column: + Sbox x0j, x1j, x2j, x3j + + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2 + ; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3 + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + ; Store a byte to Row 0 + st Y, x0j + ; Store a byte combined with ShiftRow1 + lsl t1j + mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left) + rol x1j + std Y + ROW_INBYTES, x1j + ; Store a byte combined with ShiftRow2 + inc ccnt + cpi ccnt, ROW_INBYTES - 1 + brsh ROW2_WRAP + ldd tmp0, Y + 2 * ROW_INBYTES + 2 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes + std Y + 2 * ROW_INBYTES + 2, x2j + mov x2j, t2j + mov t2j, tmp0 + jmp NO_ROW2_WRAP +ROW2_WRAP: + std Y + ROW_INBYTES + 2, x2j + mov x2j, t2j + + ; remain ShiftRow3 to be done at 'amend_shiftRow' +NO_ROW2_WRAP: + adiw YL, 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + + adiw ZL, 1 + ijmp + +amend_shiftRow: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + + ld x1j, Y + bst t1j, 7 + bld x1j, 0 + st Y, x1j + + ; <<< 1 + mov x3f, x3j + rol x3j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + rol x38 + rol x39 + rol x3a + rol x3b + rol x3c + rol x3d + rol x3e + rol x3f + ; <<< 24 + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 => + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + mov x3j, x30 + mov x30, x3d + mov x3d, x3a + mov x3a, x37 + mov x37, x34 + mov x34, x31 + mov x31, x3e + mov x3e, x3b + mov x3b, x38 + mov x38, x35 + mov x35, x32 + mov x32, x3f + mov x3f, x3c + mov x3c, x39 + mov x39, x36 + mov x36, x33 + mov x33, x3j + + dec rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + st Y+, x3c + st Y+, x3d + st Y+, x3e + st Y+, x3f + + POP_CONFLICT +ret \ No newline at end of file diff --git a/knot/Implementations/crypto_aead/knot256/avr8_lowrom/permutation.h b/knot/Implementations/crypto_aead/knot256/avr8_lowrom/permutation.h new file mode 100644 index 0000000..a57c5d3 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/avr8_lowrom/permutation.h @@ -0,0 +1,109 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; R E G I S T E R D E F I N I T I O N S +; ============================================ +; + +#define mclen r16 +#define radlen r17 +#define tcnt r17 +#define tmp0 r20 +#define tmp1 r21 +#define cnt0 r22 +#define rn r23 +#define rate r24 + +; +; ; AEDH = 0b000: for authenticate AD +; ; AEDH = 0b001: for encryption +; ; AEDH = 0b011: for decryption +; ; AEDH = 0b100: for hash +; #define AEDH r25 ; Register used globally within this program +; +; #define x30 r0 ; Register used without overlapping +; #define x31 r1 ; Register used without overlapping +; #define x32 r2 ; Register used without overlapping +; #define x33 r3 ; Register used without overlapping +; #define x34 r4 ; Register used without overlapping +; #define x35 r5 ; Register used without overlapping +; #define x36 r6 ; Register used without overlapping +; #define x37 r7 ; Register used without overlapping +; #define x38 r8 ; Register used without overlapping +; #define x39 r9 ; Register used without overlapping +; #define x3a r10 ; Register used without overlapping +; #define x3b r11 ; Register used without overlapping +; #define x3c r12 ; Register used without overlapping +; #define x3d r13 ; Register used without overlapping +; #define x3e r14 ; Register used without overlapping +; #define x3f r15 ; Register used without overlapping +; +; #define x0j r16 ; Register used overlapped, should be backed up before using +; #define x1j r17 ; Register used overlapped, should be backed up before using +; #define x2j r18 ; Register used overlapped, should be backed up before using +; #define x3j r19 ; Register used overlapped, should be backed up before using +; +; ; t2j used in knot512 to keep one byte in Row2 (because of rotating 16-bit), +; ; will not be interupt with LFSR which uses the overlapped register tmp1 +; #define t2j r21 ; Temporary register, used freely +; #define t1j r22 ; Temporary register, used freely +; #define t3j r23 ; Temporary register, used freely +; +; #define rc r24 ; Register used overlapped, should be backed up before using +; #define rcnt r26 ; Register used overlapped, should be backed up before using +; #define ccnt r27 ; Register used overlapped, should be backed up before using + +#define AEDH r25 +#define x30 r0 +#define x31 r1 +#define x32 r2 +#define x33 r3 +#define x34 r4 +#define x35 r5 +#define x36 r6 +#define x37 r7 +#define x38 r8 +#define x39 r9 +#define x3a r10 +#define x3b r11 +#define x3c r12 +#define x3d r13 +#define x3e r14 +#define x3f r15 + +#define x0j r16 +#define x1j r17 +#define x2j r18 +#define x3j r19 + +; t2j used in knot512 to keep one byte in Row2 (because of rotating 16-bit), +; will not be interupt with LFSR which uses the overlapped register tmp1 +#define t2j r21 +#define t1j r22 +#define t3j r23 + +#define rc r24 +#define rcnt r26 +#define ccnt r27 + +#if (STATE_INBITS==256) +#include "knot256.h" +#elif (STATE_INBITS==384) +#include "knot384.h" +#elif (STATE_INBITS==512) +#include "knot512.h" +#else +#error "Not specified key size and state size" +#endif + + diff --git a/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/api.h b/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/api.h new file mode 100644 index 0000000..cb530c7 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/api.h @@ -0,0 +1 @@ +#define CRYPTO_BYTES 32 \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/assist.h b/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/assist.h new file mode 100644 index 0000000..cb903a5 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/assist.h @@ -0,0 +1,140 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +.macro LFSR6_MACRO + bst rc, 5 + bld tmp0, 0 + bst rc, 4 + bld tmp1, 0 + eor tmp0, tmp1 + ror tmp0 + rol rc + andi rc, 0x3F +.endm + +.macro LFSR7_MACRO + bst rc, 6 + bld tmp0, 0 + bst rc, 5 + bld tmp1, 0 + eor tmp0, tmp1 + ror tmp0 + rol rc + andi rc, 0x7F +.endm + +.macro LFSR8_MACRO + bst rc, 7 + bld tmp0, 0 + bst rc, 5 + bld tmp1, 0 + eor tmp0, tmp1 + bst rc, 4 + bld tmp1, 0 + eor tmp0, tmp1 + bst rc, 3 + bld tmp1, 0 + eor tmp0, tmp1 + ror tmp0 + rol rc +.endm + +.macro Sbox i0, i1, i2, i3 + mov tmp0, \i1 + com \i0 + and \i1, \i0 + eor \i1, \i2 + or \i2, tmp0 + eor \i0, \i3 + eor \i2, \i0 + eor tmp0, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, tmp0 + and tmp0, \i2 + eor \i1, tmp0 +.endm + +.macro PUSH_CONFLICT + push r16 + push r17 + push r18 + push r19 + + push r23 + push r24 + + push r26 + push r27 + push r28 + push r29 + push r30 + push r31 +.endm + +.macro POP_CONFLICT + pop r31 + pop r30 + pop r29 + pop r28 + pop r27 + pop r26 + + pop r24 + pop r23 + + pop r19 + pop r18 + pop r17 + pop r16 +.endm + +.macro PUSH_ALL + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 +.endm + +.macro POP_ALL + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + clr r1 +.endm \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/config.h b/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/config.h new file mode 100644 index 0000000..467fedb --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/config.h @@ -0,0 +1,131 @@ +#ifndef __CONFIG_H__ +#define __CONFIG_H__ + +//#define CRYPTO_AEAD +#define CRYPTO_HASH + +#define MAX_MESSAGE_LENGTH 128 + +#define STATE_INBITS 256 +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBITS 128 +/* For CRYPTO_HASH */ +#define CRYPTO_BITS 256 + +#define STATE_INBYTES ((STATE_INBITS + 7) / 8) +#define ROW_INBITS ((STATE_INBITS + 3) / 4) +#define ROW_INBYTES ((ROW_INBITS + 7) / 8) + +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8) +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES +#define CRYPTO_ABYTES CRYPTO_KEYBYTES +#define CRYPTO_NOOVERLAP 1 + +#define MAX_ASSOCIATED_DATA_LENGTH 32 +#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES) + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +/* For CRYPTO_HASH */ +#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8) + + + +#define DOMAIN_BITS 0x80 +#define PAD_BITS 0x01 +#define S384_R192_BITS 0x80 + +#if (STATE_INBITS==256) +#define C1 1 +#define C2 8 +#define C3 25 +#elif (STATE_INBITS==384) +#define C1 1 +#define C2 8 +#define C3 55 +#elif (STATE_INBITS==512) +#define C1 1 +#define C2 16 +#define C3 25 +#else +#error "Not specified state size" +#endif + +#ifdef CRYPTO_AEAD +/* For CRYPTO_AEAD */ +#define KEY_INBITS (CRYPTO_KEYBYTES * 8) +#define KEY_INBYTES (CRYPTO_KEYBYTES) + +#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8) +#define NONCE_INBYTES (CRYPTO_NPUBBYTES) + +#define TAG_INBITS (CRYPTO_ABYTES * 8) +#define TAG_INBYTES (CRYPTO_ABYTES) + +#if (KEY_INBITS==128) && (STATE_INBITS==256) +#define RATE_INBITS 64 +#define NR_0 52 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==128) && (STATE_INBITS==384) +#define RATE_INBITS 192 +#define NR_0 76 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==192) && (STATE_INBITS==384) +#define RATE_INBITS 96 +#define NR_0 76 +#define NR_i 40 +#define NR_f 44 +#elif (KEY_INBITS==256) && (STATE_INBITS==512) +#define RATE_INBITS 128 +#define NR_0 100 +#define NR_i 52 +#define NR_f 56 +#else +#error "Not specified key size and state size" +#endif + +#define RATE_INBYTES ((RATE_INBITS + 7) / 8) +#define SQUEEZE_RATE_INBYTES TAG_INBYTES + +#endif + +#ifdef CRYPTO_HASH +/* For CRYPTO_HASH */ +#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8) + +#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256) +#define HASH_RATE_INBITS 32 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 68 +#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 128 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 80 +#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 48 +#define HASH_SQUEEZE_RATE_INBITS 192 +#define NR_h 104 +#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512) +#define HASH_RATE_INBITS 64 +#define HASH_SQUEEZE_RATE_INBITS 256 +#define NR_h 140 +#else +#error "Not specified hash digest size and state size" +#endif + +#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8) +#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8) + +#endif + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/crypto_hash.h b/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/crypto_hash.h new file mode 100644 index 0000000..342a639 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/crypto_hash.h @@ -0,0 +1,13 @@ +#ifdef __cplusplus +extern "C" { +#endif + +int crypto_hash( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen + ); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/encrypt.c b/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/encrypt.c new file mode 100644 index 0000000..baf0a3b --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/encrypt.c @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#include "config.h" + +extern void crypto_aead_encrypt_asm( + unsigned char *c, + const unsigned char *m, + unsigned char mlen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern int crypto_aead_decrypt_asm( + unsigned char *m, + const unsigned char *c, + unsigned char clen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern void crypto_hash_asm( + unsigned char *out, + const unsigned char *in, + unsigned char inlen + ); + + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the cipher implementation goes here, + ... generating a ciphertext c[0],c[1],...,c[*clen-1] + ... from a plaintext m[0],m[1],...,m[mlen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce npub[0],npub[1],.. + ... and secret key k[0],k[1],... + ... the implementation shall not use nsec + ... + ... return 0; + */ + + (void)nsec; + + crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k); + + *clen = mlen + TAG_INBYTES; + return 0; +} + + + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the AEAD implementation goes here, + ... generating a plaintext m[0],m[1],...,m[*mlen-1] + ... and secret message number nsec[0],nsec[1],... + ... from a ciphertext c[0],c[1],...,c[clen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce number npub[0],npub[1],... + ... and secret key k[0],k[1],... + ... + ... return 0; + */ + unsigned long long mlen_; + unsigned char tag_is_match; + + (void)nsec; + if (clen < CRYPTO_ABYTES) { + return -1; + } + mlen_ = clen - CRYPTO_ABYTES; + + tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k); + + if (tag_is_match != 0) + { + memset(m, 0, (size_t)mlen_); + return -1; + } + + *mlen = mlen_; + return 0; +} \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/encrypt_core.S b/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/encrypt_core.S new file mode 100644 index 0000000..cb7aed5 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/encrypt_core.S @@ -0,0 +1,537 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; S R A M D E F I N I T I O N S +; ============================================ +; +#include +#include "config.h" + +.section .noinit + SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (STATE_INBYTES > 32) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (STATE_INBYTES > 48) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_LEN: .BYTE 0, 0 +#ifdef CRYPTO_AEAD +; For CRYPTO_AEAD + SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0 + SRAM_ADLEN: .BYTE 0, 0 + SRAM_NONCE_ADDR: .BYTE 0, 0 + SRAM_KEY_ADDR: .BYTE 0, 0 + + SRAM_ADDITIONAL: + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (CRYPTO_ABYTES > 16) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (CRYPTO_ABYTES > 24) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + +#endif + +.section .text + +#include "permutation.h" + +; require YH:YL be the address of the current associated data/cipher/message block +; for enc and dec, store ciphertext or plaintext +; require ZH:ZL be the address of the current cipher/message block +XOR_to_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov cnt0, rate +XOR_to_State_loop: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_to_State_loop +; YH:YL are now the address of the next associated data block +ret + +; require YH:YL pointed to the input data +; require ZH:ZL pointed to the output data +; require cnt0 containes the nubmer of bytes in source data +; require number of bytes in source data less than rate, i.e., 0 <= cnt0 < rate +; +; the 0th bit in AEDH is used to distinguish (auth AD) or (enc/dec M/C): +; AEDH[0] = 0 for (auth AD), AEDH[0] = 1 for (enc/dec M/C) +; the 1th bit in AEDH is used to distinguish (auth AD/enc M) or (dec C): +; AEDH[1] = 0 for (auth AD/enc M), AEDH[1] = 1 for (dec C) +; AEDH = 0b000 for (auth AD) +; AEDH = 0b001 for (enc M) +; AEDH = 0b011 for (dec C) +Pad_XOR_to_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + tst cnt0 + breq XOR_padded_data +XOR_source_data_loop: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_source_data_loop +XOR_padded_data: + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 +ret + +AddDomain: + ldi XH, hi8(SRAM_STATE + STATE_INBYTES - 1) + ldi XL, lo8(SRAM_STATE + STATE_INBYTES - 1) + ldi tmp0, DOMAIN_BITS + ld tmp1, X + eor tmp0, tmp1 + st X, tmp0 +ret + +; require ZH:ZL be the address of the destination +EXTRACT_from_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov tmp1, rate +EXTRACT_from_State_loop: + ld tmp0, X+ + st Z+, tmp0 + dec tmp1 + brne EXTRACT_from_State_loop +ret + +AUTH: + tst radlen + breq AUTH_end + + cp radlen, rate + brlo auth_ad_padded_block + +auth_ad_loop: + rcall XOR_to_State + rcall Permutation + sub radlen, rate + cp radlen, rate + brlo auth_ad_padded_block + rjmp auth_ad_loop + +auth_ad_padded_block: + mov cnt0, radlen + rcall Pad_XOR_to_State + rcall Permutation + +AUTH_end: +ret + +#ifdef CRYPTO_AEAD +Initialization: + ldi rn, NR_0 + ldi XL, lo8(SRAM_STATE) + ldi XH, hi8(SRAM_STATE) + + lds YH, SRAM_NONCE_ADDR + lds YL, SRAM_NONCE_ADDR + 1 + ldi cnt0, CRYPTO_NPUBBYTES +load_nonce_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_nonce_loop + + lds YH, SRAM_KEY_ADDR + lds YL, SRAM_KEY_ADDR + 1 + ldi cnt0, CRYPTO_KEYBYTES +load_key_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_key_loop + +#if (STATE_INBITS==384) && (RATE_INBITS==192) + ldi cnt0, (STATE_INBYTES - CRYPTO_NPUBBYTES - CRYPTO_KEYBYTES - 1) + clr tmp0 +empty_state_loop: + st X+, tmp0 + dec cnt0 + brne empty_state_loop + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + rcall Permutation +ret + +ENC: + tst mclen + breq ENC_end + + cp mclen, rate + brlo enc_padded_block + +enc_loop: + rcall XOR_to_State + ldi rn, NR_i + rcall Permutation + sub mclen, rate + cp mclen, rate + brlo enc_padded_block + rjmp enc_loop + +enc_padded_block: + mov cnt0, mclen + rcall Pad_XOR_to_State +ENC_end: +ret + +Finalization: + ldi rate, SQUEEZE_RATE_INBYTES + ldi rn, NR_f + rcall Permutation + rcall EXTRACT_from_State +ret + +; void crypto_aead_encrypt_asm( +; unsigned char *c, +; const unsigned char *m, +; unsigned long long mlen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *c, is passed in r24:r25 +; const unsigned char *m, is passed in r22:r23 +; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_encrypt_asm +crypto_aead_encrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b001 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + rcall Finalization + POP_ALL +ret + +; int crypto_aead_decrypt_asm( +; unsigned char *m, +; const unsigned char *c, +; unsigned long long clen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *m, is passed in r24:r25 +; const unsigned char *c, is passed in r22:r23 +; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_decrypt_asm +crypto_aead_decrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b011 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + + ldi ZH, hi8(SRAM_ADDITIONAL) + ldi ZL, lo8(SRAM_ADDITIONAL) + rcall Finalization + + sbiw ZL, CRYPTO_ABYTES + ldi cnt0, CRYPTO_ABYTES +compare_tag: + ld tmp0, Z+ + ld tmp1, Y+ + cp tmp0, tmp1 + brne return_tag_not_match + dec cnt0 + brne compare_tag + rjmp return_tag_match + +return_tag_not_match: + ldi r25, 0xFF + ldi r24, 0xFF + rjmp crypto_aead_decrypt_end + +return_tag_match: + clr r25 + clr r24 +crypto_aead_decrypt_end: + POP_ALL +ret + +; #ifdef CRYPTO_AEAD +#endif + + +#ifdef CRYPTO_HASH + +; void crypto_hash_asm( +; unsigned char *out, +; const unsigned char *in, +; unsigned long long inlen +; ) +; +; unsigned char *out, is passed in r24:r25 +; const unsigned char *in, is passed in r22:r23 +; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used +.global crypto_hash_asm +crypto_hash_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + mov mclen, r20 + + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi cnt0, STATE_INBYTES - 1 +#else + ldi cnt0, STATE_INBYTES +#endif + clr tmp0 +zero_state: + st X+, tmp0 + dec cnt0 + brne zero_state + +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + ldi rn, NR_h + ldi AEDH, 0b100 + +HASH_ABSORBING: + mov radlen, mclen + tst radlen + breq EMPTY_M + ldi rate, HASH_RATE_INBYTES + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + rcall AUTH + rjmp HASH_SQUEEZING + +EMPTY_M: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 + rcall Permutation + +HASH_SQUEEZING: + ldi rate, HASH_SQUEEZE_RATE_INBYTES + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + ldi tcnt, CRYPTO_BYTES +SQUEEZING_loop: + rcall EXTRACT_from_State + subi tcnt, HASH_SQUEEZE_RATE_INBYTES + breq HASH_SQUEEZING_end + rcall Permutation + rjmp SQUEEZING_loop +HASH_SQUEEZING_end: + POP_ALL +ret + +#endif + + +; Byte Order In AVR 8: +; KNOT-AEAD(128, 256, 64): +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[7] <<< 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Perm_row_1[0] 1 +; N[ 9] AEAD_State[ 9] | Perm_row_1[1] 1 +; N[10] AEAD_State[10] | Perm_row_1[2] 1 +; N[11] AEAD_State[11] | Perm_row_1[3] 1 +; N[12] AEAD_State[12] | Perm_row_1[4] 1 +; N[13] AEAD_State[13] | Perm_row_1[5] 1 +; N[14] AEAD_State[14] | Perm_row_1[6] 1 +; N[15] AEAD_State[15] | Perm_row_1[7] <<< 1 +; K[ 0] AEAD_State[16] | Perm_row_2[0] 8 +; K[ 1] AEAD_State[17] | Perm_row_2[1] 8 +; K[ 2] AEAD_State[18] | Perm_row_2[2] 8 +; K[ 3] AEAD_State[19] | Perm_row_2[3] 8 +; K[ 4] AEAD_State[20] | Perm_row_2[4] 8 +; K[ 5] AEAD_State[21] | Perm_row_2[5] 8 +; K[ 6] AEAD_State[22] | Perm_row_2[6] 8 +; K[ 7] AEAD_State[23] | Perm_row_2[7] <<< 8 +; K[ 8] AEAD_State[24] | Perm_row_3[0] 25 +; K[ 9] AEAD_State[25] | Perm_row_3[1] 25 +; K[10] AEAD_State[26] | Perm_row_3[2] 25 +; K[11] AEAD_State[27] | Perm_row_3[3] 25 +; K[12] AEAD_State[28] | Perm_row_3[4] 25 +; K[13] AEAD_State[29] | Perm_row_3[5] 25 +; K[14] AEAD_State[30] | Perm_row_3[6] 25 +; K[15] AEAD_State[31] | ^0x80 Perm_row_3[7] <<< 25 +; +; +; KNOT-AEAD(128, 384, 192): +; Initalization +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[ 0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[ 1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[ 2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[ 3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[ 4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[ 5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[ 6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[ 7] 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Message[ 8] 0x00 Perm_row_0[ 8] 0 Tag[ 8] +; N[ 9] AEAD_State[ 9] | Message[ 9] 0x00 Perm_row_0[ 9] 0 Tag[ 9] +; N[10] AEAD_State[10] | Message[10] 0x00 Perm_row_0[10] 0 Tag[10] +; N[11] AEAD_State[11] | Message[11] 0x00 Perm_row_0[11] <<< 0 Tag[11] +; N[12] AEAD_State[12] | Message[12] 0x00 Perm_row_1[ 0] 1 Tag[12] +; N[13] AEAD_State[13] | Message[13] 0x00 Perm_row_1[ 1] 1 Tag[13] +; N[14] AEAD_State[14] | Message[14] 0x00 Perm_row_1[ 2] 1 Tag[14] +; N[15] AEAD_State[15] | Message[15] 0x00 Perm_row_1[ 3] 1 Tag[15] +; K[ 0] AEAD_State[16] | Message[16] 0x00 Perm_row_1[ 4] 1 +; K[ 1] AEAD_State[17] | Message[17] 0x00 Perm_row_1[ 5] 1 +; K[ 2] AEAD_State[18] | Message[18] 0x00 Perm_row_1[ 6] 1 +; K[ 3] AEAD_State[19] | Message[19] 0x00 Perm_row_1[ 7] 1 +; K[ 4] AEAD_State[20] | Message[20] 0x00 Perm_row_1[ 8] 1 +; K[ 5] AEAD_State[21] | Message[21] 0x00 Perm_row_1[ 9] 1 +; K[ 6] AEAD_State[22] | Message[22] 0x00 Perm_row_1[10] 1 +; K[ 7] AEAD_State[23] | Message[23] 0x00 Perm_row_1[11] <<< 1 +; K[ 8] AEAD_State[24] | Perm_row_2[ 0] 8 +; K[ 9] AEAD_State[25] | Perm_row_2[ 1] 8 +; K[10] AEAD_State[26] | Perm_row_2[ 2] 8 +; K[11] AEAD_State[27] | Perm_row_2[ 3] 8 +; K[12] AEAD_State[28] | Perm_row_2[ 4] 8 +; K[13] AEAD_State[29] | Perm_row_2[ 5] 8 +; K[14] AEAD_State[30] | Perm_row_2[ 6] 8 +; K[15] AEAD_State[31] | Perm_row_2[ 7] 8 +; 0x00 AEAD_State[32] | Perm_row_2[ 8] 8 +; 0x00 AEAD_State[33] | Perm_row_2[ 9] 8 +; 0x00 AEAD_State[34] | Perm_row_2[10] 8 +; 0x00 AEAD_State[35] | Perm_row_2[11] <<< 8 +; 0x00 AEAD_State[36] | Perm_row_3[ 0] 55 +; 0x00 AEAD_State[37] | Perm_row_3[ 1] 55 +; 0x00 AEAD_State[38] | Perm_row_3[ 2] 55 +; 0x00 AEAD_State[39] | Perm_row_3[ 3] 55 +; 0x00 AEAD_State[40] | Perm_row_3[ 4] 55 +; 0x00 AEAD_State[41] | Perm_row_3[ 5] 55 +; 0x00 AEAD_State[42] | Perm_row_3[ 6] 55 +; 0x00 AEAD_State[43] | Perm_row_3[ 7] 55 +; 0x00 AEAD_State[44] | Perm_row_3[ 8] 55 +; 0x00 AEAD_State[45] | Perm_row_3[ 9] 55 +; 0x00 AEAD_State[46] | Perm_row_3[10] 55 +; 0x00 ^0x80 AEAD_State[47] | ^0x80 Perm_row_3[11] <<< 55 diff --git a/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/hash.c b/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/hash.c new file mode 100644 index 0000000..dbbe4df --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/hash.c @@ -0,0 +1,32 @@ +#include +#include +#include +#include +#include "api.h" +#include "crypto_hash.h" + +extern void crypto_hash_asm( + unsigned char *out, + const unsigned char *in, + unsigned char inlen + ); + +int crypto_hash( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen +) +{ + /* + ... + ... the code for the hash function implementation goes here + ... generating a hash value out[0],out[1],...,out[CRYPTO_BYTES-1] + ... from a message in[0],in[1],...,in[in-1] + ... + ... return 0; + */ + + crypto_hash_asm(out, in, inlen); + + return 0; +} \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/knot256.h b/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/knot256.h new file mode 100644 index 0000000..d16bf8c --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/knot256.h @@ -0,0 +1,197 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +#include "assist.h" + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi rc, 0x01 + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + +round_loop_start: + rjmp AddRC_SubColumns_Start + +load_columns_table: + rjmp load_column0 + rjmp load_column1 + rjmp load_column2 + rjmp load_column3 + rjmp load_column4 + rjmp load_column5 + rjmp load_column6 + rjmp load_column7 + rjmp amend_shiftRow + +load_column0: + mov x3j, x30 + rjmp Sbox_one_column +load_column1: + mov x30, x3j + mov x3j, x31 + rjmp Sbox_one_column +load_column2: + mov x31, x3j + mov x3j, x32 + rjmp Sbox_one_column +load_column3: + mov x32, x3j + mov x3j, x33 + rjmp Sbox_one_column +load_column4: + mov x33, x3j + mov x3j, x34 + rjmp Sbox_one_column +load_column5: + mov x34, x3j + mov x3j, x35 + rjmp Sbox_one_column +load_column6: + mov x35, x3j + mov x3j, x36 + rjmp Sbox_one_column +load_column7: + mov x36, x3j + mov x3j, x37 + rjmp Sbox_one_column + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +LFSR_table: + rjmp LFSR6 + rjmp LFSR7 +LFSR6: + LFSR6_MACRO + rjmp LFSR_DONE +LFSR7: + LFSR7_MACRO + rjmp LFSR_DONE +#endif + +;;;;;;;;;;;;;;;;;;;;;;;; Real Start +AddRC_SubColumns_Start: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + clr ccnt + ld x0j, Y + eor x0j, rc + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + ldi ZL, pm_lo8(LFSR_table) + ldi ZH, pm_hi8(LFSR_table) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[1] = 1 for HASH + adiw ZL, 1 + ijmp +LFSR_DONE: +#elif defined(CRYPTO_AEAD) + LFSR6_MACRO ; only AEAD +#else + LFSR7_MACRO ; only HASH +#endif + + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + ldi ZL, pm_lo8(load_columns_table) + ldi ZH, pm_hi8(load_columns_table) + ijmp +Sbox_one_column: + Sbox x0j, x1j, x2j, x3j + + ; 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- x- -- 1 + ; -- -- -- -- x' -- -- -- 3 + ; 4 3 2 1 0 7 6 5 + ; Store a byte to Row 0 + st Y, x0j + ; Store a byte combined with ShiftRow1 + lsl t1j + mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left) + rol x1j + std Y + ROW_INBYTES, x1j + ; Store a byte combined with ShiftRow2 + inc ccnt + cpi ccnt, ROW_INBYTES + breq ROW2_WRAP + ldd t2j, Y + 2 * ROW_INBYTES + 1 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t2j + jmp NO_ROW2_WRAP +ROW2_WRAP: + std Y + ROW_INBYTES + 1, x2j + ; remain ShiftRow3 to be done at 'amend_shiftRow' +NO_ROW2_WRAP: + adiw YL, 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + + adiw ZL, 1 + ijmp + +amend_shiftRow: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + + ld x1j, Y + bst t1j, 7 + bld x1j, 0 + st Y, x1j + + ; <<< 1 + mov x37, x3j + rol x3j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + ; <<< 24 + ; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5 + mov x3j, x30 + mov x30, x35 + mov x35, x32 + mov x32, x37 + mov x37, x34 + mov x34, x31 + mov x31, x36 + mov x36, x33 + mov x33, x3j + + dec rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + + POP_CONFLICT +ret \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/knot384.h b/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/knot384.h new file mode 100644 index 0000000..65c474a --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/knot384.h @@ -0,0 +1,219 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +#include "assist.h" + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi rc, 0x01 + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + +round_loop_start: + rjmp AddRC_SubColumns_Start + +load_columns_table: + rjmp load_column0 + rjmp load_column1 + rjmp load_column2 + rjmp load_column3 + rjmp load_column4 + rjmp load_column5 + rjmp load_column6 + rjmp load_column7 + rjmp load_column8 + rjmp load_column9 + rjmp load_columna + rjmp load_columnb + rjmp amend_shiftRow + +load_column0: + mov x3j, x30 + rjmp Sbox_one_column +load_column1: + mov x30, x3j + mov x3j, x31 + rjmp Sbox_one_column +load_column2: + mov x31, x3j + mov x3j, x32 + rjmp Sbox_one_column +load_column3: + mov x32, x3j + mov x3j, x33 + rjmp Sbox_one_column +load_column4: + mov x33, x3j + mov x3j, x34 + rjmp Sbox_one_column +load_column5: + mov x34, x3j + mov x3j, x35 + rjmp Sbox_one_column +load_column6: + mov x35, x3j + mov x3j, x36 + rjmp Sbox_one_column +load_column7: + mov x36, x3j + mov x3j, x37 + rjmp Sbox_one_column +load_column8: + mov x37, x3j + mov x3j, x38 + rjmp Sbox_one_column +load_column9: + mov x38, x3j + mov x3j, x39 + rjmp Sbox_one_column +load_columna: + mov x39, x3j + mov x3j, x3a + rjmp Sbox_one_column +load_columnb: + mov x3a, x3j + mov x3j, x3b + rjmp Sbox_one_column + +;;;;;;;;;;;;;;;;;;;;;;;; Real Start +AddRC_SubColumns_Start: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + ldi ZL, pm_lo8(load_columns_table) + ldi ZH, pm_hi8(load_columns_table) + clr ccnt + ld x0j, Y + eor x0j, rc + LFSR7_MACRO + + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + ijmp +Sbox_one_column: + Sbox x0j, x1j, x2j, x3j + + ; b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- x- -- 1 + ; -- -- -- -- x' -- -- -- -- -- -- -- 7 + ; 4 3 2 1 0 b a 9 8 7 6 5 + ; Store a byte to Row 0 + st Y, x0j + ; Store a byte combined with ShiftRow 1 + lsl t1j + mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left) + rol x1j + std Y + ROW_INBYTES, x1j + ; Store a byte combined with ShiftRow 2 + inc ccnt + cpi ccnt, ROW_INBYTES + breq ROW2_WRAP + ldd t2j, Y + 2 * ROW_INBYTES + 1 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t2j + jmp NO_ROW2_WRAP +ROW2_WRAP: + std Y + ROW_INBYTES + 1, x2j + ; remain ShiftRow3 to be done at 'amend_shiftRow' +NO_ROW2_WRAP: + adiw YL, 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + + adiw ZL, 1 + ijmp + +amend_shiftRow: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + + ld x1j, Y + bst t1j, 7 + bld x1j, 0 + st Y, x1j + + ; >>> 1 + mov x3b, x3j + ror x3j + ror x3a + ror x39 + ror x38 + ror x37 + ror x36 + ror x35 + ror x34 + ror x33 + ror x32 + ror x31 + ror x30 + ror x3b + ; <<< 56 + ; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5 + ;mov x3j, x30 + ;mov x30, x35 + ;mov x35, x32 + ;mov x32, x37 + ;mov x37, x34 + ;mov x34, x31 + ;mov x31, x36 + ;mov x36, x33 + ;mov x33, x3j + mov x3j, x30 + mov x30, x35 + mov x35, x3a + mov x3a, x33 + mov x33, x38 + mov x38, x31 + mov x31, x36 + mov x36, x3b + mov x3b, x34 + mov x34, x39 + mov x39, x32 + mov x32, x37 + mov x37, x3j + + dec rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + + POP_CONFLICT +ret \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/knot512.h b/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/knot512.h new file mode 100644 index 0000000..d24b353 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/knot512.h @@ -0,0 +1,275 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +#include "assist.h" + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi rc, 0x01 + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + ld x3c, Y+ + ld x3d, Y+ + ld x3e, Y+ + ld x3f, Y+ + +round_loop_start: + rjmp AddRC_SubColumns_Start + +load_columns_table: + rjmp load_column0 + rjmp load_column1 + rjmp load_column2 + rjmp load_column3 + rjmp load_column4 + rjmp load_column5 + rjmp load_column6 + rjmp load_column7 + rjmp load_column8 + rjmp load_column9 + rjmp load_columna + rjmp load_columnb + rjmp load_columnc + rjmp load_columnd + rjmp load_columne + rjmp load_columnf + rjmp amend_shiftRow + +load_column0: + mov x3j, x30 + rjmp Sbox_one_column +load_column1: + mov x30, x3j + mov x3j, x31 + rjmp Sbox_one_column +load_column2: + mov x31, x3j + mov x3j, x32 + rjmp Sbox_one_column +load_column3: + mov x32, x3j + mov x3j, x33 + rjmp Sbox_one_column +load_column4: + mov x33, x3j + mov x3j, x34 + rjmp Sbox_one_column +load_column5: + mov x34, x3j + mov x3j, x35 + rjmp Sbox_one_column +load_column6: + mov x35, x3j + mov x3j, x36 + rjmp Sbox_one_column +load_column7: + mov x36, x3j + mov x3j, x37 + rjmp Sbox_one_column +load_column8: + mov x37, x3j + mov x3j, x38 + rjmp Sbox_one_column +load_column9: + mov x38, x3j + mov x3j, x39 + rjmp Sbox_one_column +load_columna: + mov x39, x3j + mov x3j, x3a + rjmp Sbox_one_column +load_columnb: + mov x3a, x3j + mov x3j, x3b + rjmp Sbox_one_column +load_columnc: + mov x3b, x3j + mov x3j, x3c + rjmp Sbox_one_column +load_columnd: + mov x3c, x3j + mov x3j, x3d + rjmp Sbox_one_column +load_columne: + mov x3d, x3j + mov x3j, x3e + rjmp Sbox_one_column +load_columnf: + mov x3e, x3j + mov x3j, x3f + rjmp Sbox_one_column + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +LFSR_table: + rjmp LFSR7 + rjmp LFSR8 +LFSR7: + LFSR7_MACRO + rjmp LFSR_DONE +LFSR8: + LFSR8_MACRO + rjmp LFSR_DONE +#endif + +;;;;;;;;;;;;;;;;;;;;;;;; Real Start +AddRC_SubColumns_Start: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + clr ccnt + ld x0j, Y + eor x0j, rc + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + ldi ZL, pm_lo8(LFSR_table) + ldi ZH, pm_hi8(LFSR_table) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[1] = 1 for HASH + adiw ZL, 1 + ijmp +LFSR_DONE: +#elif defined(CRYPTO_AEAD) + LFSR7_MACRO ; only AEAD +#else + LFSR8_MACRO ; only HASH +#endif + + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + ldd t2j, Y + 2 * ROW_INBYTES + 1 + ldi ZL, pm_lo8(load_columns_table) + ldi ZH, pm_hi8(load_columns_table) + ijmp +Sbox_one_column: + Sbox x0j, x1j, x2j, x3j + + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2 + ; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3 + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + ; Store a byte to Row 0 + st Y, x0j + ; Store a byte combined with ShiftRow1 + lsl t1j + mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left) + rol x1j + std Y + ROW_INBYTES, x1j + ; Store a byte combined with ShiftRow2 + inc ccnt + cpi ccnt, ROW_INBYTES - 1 + brsh ROW2_WRAP + ldd tmp0, Y + 2 * ROW_INBYTES + 2 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes + std Y + 2 * ROW_INBYTES + 2, x2j + mov x2j, t2j + mov t2j, tmp0 + jmp NO_ROW2_WRAP +ROW2_WRAP: + std Y + ROW_INBYTES + 2, x2j + mov x2j, t2j + + ; remain ShiftRow3 to be done at 'amend_shiftRow' +NO_ROW2_WRAP: + adiw YL, 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + + adiw ZL, 1 + ijmp + +amend_shiftRow: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + + ld x1j, Y + bst t1j, 7 + bld x1j, 0 + st Y, x1j + + ; <<< 1 + mov x3f, x3j + rol x3j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + rol x38 + rol x39 + rol x3a + rol x3b + rol x3c + rol x3d + rol x3e + rol x3f + ; <<< 24 + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 => + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + mov x3j, x30 + mov x30, x3d + mov x3d, x3a + mov x3a, x37 + mov x37, x34 + mov x34, x31 + mov x31, x3e + mov x3e, x3b + mov x3b, x38 + mov x38, x35 + mov x35, x32 + mov x32, x3f + mov x3f, x3c + mov x3c, x39 + mov x39, x36 + mov x36, x33 + mov x33, x3j + + dec rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + st Y+, x3c + st Y+, x3d + st Y+, x3e + st Y+, x3f + + POP_CONFLICT +ret \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/permutation.h b/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/permutation.h new file mode 100644 index 0000000..a57c5d3 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v1/avr8_lowrom/permutation.h @@ -0,0 +1,109 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; R E G I S T E R D E F I N I T I O N S +; ============================================ +; + +#define mclen r16 +#define radlen r17 +#define tcnt r17 +#define tmp0 r20 +#define tmp1 r21 +#define cnt0 r22 +#define rn r23 +#define rate r24 + +; +; ; AEDH = 0b000: for authenticate AD +; ; AEDH = 0b001: for encryption +; ; AEDH = 0b011: for decryption +; ; AEDH = 0b100: for hash +; #define AEDH r25 ; Register used globally within this program +; +; #define x30 r0 ; Register used without overlapping +; #define x31 r1 ; Register used without overlapping +; #define x32 r2 ; Register used without overlapping +; #define x33 r3 ; Register used without overlapping +; #define x34 r4 ; Register used without overlapping +; #define x35 r5 ; Register used without overlapping +; #define x36 r6 ; Register used without overlapping +; #define x37 r7 ; Register used without overlapping +; #define x38 r8 ; Register used without overlapping +; #define x39 r9 ; Register used without overlapping +; #define x3a r10 ; Register used without overlapping +; #define x3b r11 ; Register used without overlapping +; #define x3c r12 ; Register used without overlapping +; #define x3d r13 ; Register used without overlapping +; #define x3e r14 ; Register used without overlapping +; #define x3f r15 ; Register used without overlapping +; +; #define x0j r16 ; Register used overlapped, should be backed up before using +; #define x1j r17 ; Register used overlapped, should be backed up before using +; #define x2j r18 ; Register used overlapped, should be backed up before using +; #define x3j r19 ; Register used overlapped, should be backed up before using +; +; ; t2j used in knot512 to keep one byte in Row2 (because of rotating 16-bit), +; ; will not be interupt with LFSR which uses the overlapped register tmp1 +; #define t2j r21 ; Temporary register, used freely +; #define t1j r22 ; Temporary register, used freely +; #define t3j r23 ; Temporary register, used freely +; +; #define rc r24 ; Register used overlapped, should be backed up before using +; #define rcnt r26 ; Register used overlapped, should be backed up before using +; #define ccnt r27 ; Register used overlapped, should be backed up before using + +#define AEDH r25 +#define x30 r0 +#define x31 r1 +#define x32 r2 +#define x33 r3 +#define x34 r4 +#define x35 r5 +#define x36 r6 +#define x37 r7 +#define x38 r8 +#define x39 r9 +#define x3a r10 +#define x3b r11 +#define x3c r12 +#define x3d r13 +#define x3e r14 +#define x3f r15 + +#define x0j r16 +#define x1j r17 +#define x2j r18 +#define x3j r19 + +; t2j used in knot512 to keep one byte in Row2 (because of rotating 16-bit), +; will not be interupt with LFSR which uses the overlapped register tmp1 +#define t2j r21 +#define t1j r22 +#define t3j r23 + +#define rc r24 +#define rcnt r26 +#define ccnt r27 + +#if (STATE_INBITS==256) +#include "knot256.h" +#elif (STATE_INBITS==384) +#include "knot384.h" +#elif (STATE_INBITS==512) +#include "knot512.h" +#else +#error "Not specified key size and state size" +#endif + + diff --git a/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/api.h b/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/api.h new file mode 100644 index 0000000..cb530c7 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/api.h @@ -0,0 +1 @@ +#define CRYPTO_BYTES 32 \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/assist.h b/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/assist.h new file mode 100644 index 0000000..cb903a5 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/assist.h @@ -0,0 +1,140 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +.macro LFSR6_MACRO + bst rc, 5 + bld tmp0, 0 + bst rc, 4 + bld tmp1, 0 + eor tmp0, tmp1 + ror tmp0 + rol rc + andi rc, 0x3F +.endm + +.macro LFSR7_MACRO + bst rc, 6 + bld tmp0, 0 + bst rc, 5 + bld tmp1, 0 + eor tmp0, tmp1 + ror tmp0 + rol rc + andi rc, 0x7F +.endm + +.macro LFSR8_MACRO + bst rc, 7 + bld tmp0, 0 + bst rc, 5 + bld tmp1, 0 + eor tmp0, tmp1 + bst rc, 4 + bld tmp1, 0 + eor tmp0, tmp1 + bst rc, 3 + bld tmp1, 0 + eor tmp0, tmp1 + ror tmp0 + rol rc +.endm + +.macro Sbox i0, i1, i2, i3 + mov tmp0, \i1 + com \i0 + and \i1, \i0 + eor \i1, \i2 + or \i2, tmp0 + eor \i0, \i3 + eor \i2, \i0 + eor tmp0, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, tmp0 + and tmp0, \i2 + eor \i1, tmp0 +.endm + +.macro PUSH_CONFLICT + push r16 + push r17 + push r18 + push r19 + + push r23 + push r24 + + push r26 + push r27 + push r28 + push r29 + push r30 + push r31 +.endm + +.macro POP_CONFLICT + pop r31 + pop r30 + pop r29 + pop r28 + pop r27 + pop r26 + + pop r24 + pop r23 + + pop r19 + pop r18 + pop r17 + pop r16 +.endm + +.macro PUSH_ALL + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 +.endm + +.macro POP_ALL + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + clr r1 +.endm \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/config.h b/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/config.h new file mode 100644 index 0000000..f6fb82b --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/config.h @@ -0,0 +1,131 @@ +#ifndef __CONFIG_H__ +#define __CONFIG_H__ + +//#define CRYPTO_AEAD +#define CRYPTO_HASH + +#define MAX_MESSAGE_LENGTH 128 + +#define STATE_INBITS 384 +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBITS 128 +/* For CRYPTO_HASH */ +#define CRYPTO_BITS 256 + +#define STATE_INBYTES ((STATE_INBITS + 7) / 8) +#define ROW_INBITS ((STATE_INBITS + 3) / 4) +#define ROW_INBYTES ((ROW_INBITS + 7) / 8) + +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8) +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES +#define CRYPTO_ABYTES CRYPTO_KEYBYTES +#define CRYPTO_NOOVERLAP 1 + +#define MAX_ASSOCIATED_DATA_LENGTH 32 +#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES) + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +/* For CRYPTO_HASH */ +#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8) + + + +#define DOMAIN_BITS 0x80 +#define PAD_BITS 0x01 +#define S384_R192_BITS 0x80 + +#if (STATE_INBITS==256) +#define C1 1 +#define C2 8 +#define C3 25 +#elif (STATE_INBITS==384) +#define C1 1 +#define C2 8 +#define C3 55 +#elif (STATE_INBITS==512) +#define C1 1 +#define C2 16 +#define C3 25 +#else +#error "Not specified state size" +#endif + +#ifdef CRYPTO_AEAD +/* For CRYPTO_AEAD */ +#define KEY_INBITS (CRYPTO_KEYBYTES * 8) +#define KEY_INBYTES (CRYPTO_KEYBYTES) + +#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8) +#define NONCE_INBYTES (CRYPTO_NPUBBYTES) + +#define TAG_INBITS (CRYPTO_ABYTES * 8) +#define TAG_INBYTES (CRYPTO_ABYTES) + +#if (KEY_INBITS==128) && (STATE_INBITS==256) +#define RATE_INBITS 64 +#define NR_0 52 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==128) && (STATE_INBITS==384) +#define RATE_INBITS 192 +#define NR_0 76 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==192) && (STATE_INBITS==384) +#define RATE_INBITS 96 +#define NR_0 76 +#define NR_i 40 +#define NR_f 44 +#elif (KEY_INBITS==256) && (STATE_INBITS==512) +#define RATE_INBITS 128 +#define NR_0 100 +#define NR_i 52 +#define NR_f 56 +#else +#error "Not specified key size and state size" +#endif + +#define RATE_INBYTES ((RATE_INBITS + 7) / 8) +#define SQUEEZE_RATE_INBYTES TAG_INBYTES + +#endif + +#ifdef CRYPTO_HASH +/* For CRYPTO_HASH */ +#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8) + +#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256) +#define HASH_RATE_INBITS 32 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 68 +#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 128 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 80 +#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 48 +#define HASH_SQUEEZE_RATE_INBITS 192 +#define NR_h 104 +#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512) +#define HASH_RATE_INBITS 64 +#define HASH_SQUEEZE_RATE_INBITS 256 +#define NR_h 140 +#else +#error "Not specified hash digest size and state size" +#endif + +#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8) +#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8) + +#endif + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/crypto_hash.h b/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/crypto_hash.h new file mode 100644 index 0000000..342a639 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/crypto_hash.h @@ -0,0 +1,13 @@ +#ifdef __cplusplus +extern "C" { +#endif + +int crypto_hash( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen + ); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/encrypt.c b/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/encrypt.c new file mode 100644 index 0000000..baf0a3b --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/encrypt.c @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#include "config.h" + +extern void crypto_aead_encrypt_asm( + unsigned char *c, + const unsigned char *m, + unsigned char mlen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern int crypto_aead_decrypt_asm( + unsigned char *m, + const unsigned char *c, + unsigned char clen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern void crypto_hash_asm( + unsigned char *out, + const unsigned char *in, + unsigned char inlen + ); + + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the cipher implementation goes here, + ... generating a ciphertext c[0],c[1],...,c[*clen-1] + ... from a plaintext m[0],m[1],...,m[mlen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce npub[0],npub[1],.. + ... and secret key k[0],k[1],... + ... the implementation shall not use nsec + ... + ... return 0; + */ + + (void)nsec; + + crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k); + + *clen = mlen + TAG_INBYTES; + return 0; +} + + + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the AEAD implementation goes here, + ... generating a plaintext m[0],m[1],...,m[*mlen-1] + ... and secret message number nsec[0],nsec[1],... + ... from a ciphertext c[0],c[1],...,c[clen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce number npub[0],npub[1],... + ... and secret key k[0],k[1],... + ... + ... return 0; + */ + unsigned long long mlen_; + unsigned char tag_is_match; + + (void)nsec; + if (clen < CRYPTO_ABYTES) { + return -1; + } + mlen_ = clen - CRYPTO_ABYTES; + + tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k); + + if (tag_is_match != 0) + { + memset(m, 0, (size_t)mlen_); + return -1; + } + + *mlen = mlen_; + return 0; +} \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/encrypt_core.S b/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/encrypt_core.S new file mode 100644 index 0000000..cb7aed5 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/encrypt_core.S @@ -0,0 +1,537 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; S R A M D E F I N I T I O N S +; ============================================ +; +#include +#include "config.h" + +.section .noinit + SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (STATE_INBYTES > 32) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (STATE_INBYTES > 48) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_LEN: .BYTE 0, 0 +#ifdef CRYPTO_AEAD +; For CRYPTO_AEAD + SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0 + SRAM_ADLEN: .BYTE 0, 0 + SRAM_NONCE_ADDR: .BYTE 0, 0 + SRAM_KEY_ADDR: .BYTE 0, 0 + + SRAM_ADDITIONAL: + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (CRYPTO_ABYTES > 16) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (CRYPTO_ABYTES > 24) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + +#endif + +.section .text + +#include "permutation.h" + +; require YH:YL be the address of the current associated data/cipher/message block +; for enc and dec, store ciphertext or plaintext +; require ZH:ZL be the address of the current cipher/message block +XOR_to_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov cnt0, rate +XOR_to_State_loop: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_to_State_loop +; YH:YL are now the address of the next associated data block +ret + +; require YH:YL pointed to the input data +; require ZH:ZL pointed to the output data +; require cnt0 containes the nubmer of bytes in source data +; require number of bytes in source data less than rate, i.e., 0 <= cnt0 < rate +; +; the 0th bit in AEDH is used to distinguish (auth AD) or (enc/dec M/C): +; AEDH[0] = 0 for (auth AD), AEDH[0] = 1 for (enc/dec M/C) +; the 1th bit in AEDH is used to distinguish (auth AD/enc M) or (dec C): +; AEDH[1] = 0 for (auth AD/enc M), AEDH[1] = 1 for (dec C) +; AEDH = 0b000 for (auth AD) +; AEDH = 0b001 for (enc M) +; AEDH = 0b011 for (dec C) +Pad_XOR_to_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + tst cnt0 + breq XOR_padded_data +XOR_source_data_loop: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_source_data_loop +XOR_padded_data: + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 +ret + +AddDomain: + ldi XH, hi8(SRAM_STATE + STATE_INBYTES - 1) + ldi XL, lo8(SRAM_STATE + STATE_INBYTES - 1) + ldi tmp0, DOMAIN_BITS + ld tmp1, X + eor tmp0, tmp1 + st X, tmp0 +ret + +; require ZH:ZL be the address of the destination +EXTRACT_from_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov tmp1, rate +EXTRACT_from_State_loop: + ld tmp0, X+ + st Z+, tmp0 + dec tmp1 + brne EXTRACT_from_State_loop +ret + +AUTH: + tst radlen + breq AUTH_end + + cp radlen, rate + brlo auth_ad_padded_block + +auth_ad_loop: + rcall XOR_to_State + rcall Permutation + sub radlen, rate + cp radlen, rate + brlo auth_ad_padded_block + rjmp auth_ad_loop + +auth_ad_padded_block: + mov cnt0, radlen + rcall Pad_XOR_to_State + rcall Permutation + +AUTH_end: +ret + +#ifdef CRYPTO_AEAD +Initialization: + ldi rn, NR_0 + ldi XL, lo8(SRAM_STATE) + ldi XH, hi8(SRAM_STATE) + + lds YH, SRAM_NONCE_ADDR + lds YL, SRAM_NONCE_ADDR + 1 + ldi cnt0, CRYPTO_NPUBBYTES +load_nonce_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_nonce_loop + + lds YH, SRAM_KEY_ADDR + lds YL, SRAM_KEY_ADDR + 1 + ldi cnt0, CRYPTO_KEYBYTES +load_key_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_key_loop + +#if (STATE_INBITS==384) && (RATE_INBITS==192) + ldi cnt0, (STATE_INBYTES - CRYPTO_NPUBBYTES - CRYPTO_KEYBYTES - 1) + clr tmp0 +empty_state_loop: + st X+, tmp0 + dec cnt0 + brne empty_state_loop + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + rcall Permutation +ret + +ENC: + tst mclen + breq ENC_end + + cp mclen, rate + brlo enc_padded_block + +enc_loop: + rcall XOR_to_State + ldi rn, NR_i + rcall Permutation + sub mclen, rate + cp mclen, rate + brlo enc_padded_block + rjmp enc_loop + +enc_padded_block: + mov cnt0, mclen + rcall Pad_XOR_to_State +ENC_end: +ret + +Finalization: + ldi rate, SQUEEZE_RATE_INBYTES + ldi rn, NR_f + rcall Permutation + rcall EXTRACT_from_State +ret + +; void crypto_aead_encrypt_asm( +; unsigned char *c, +; const unsigned char *m, +; unsigned long long mlen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *c, is passed in r24:r25 +; const unsigned char *m, is passed in r22:r23 +; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_encrypt_asm +crypto_aead_encrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b001 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + rcall Finalization + POP_ALL +ret + +; int crypto_aead_decrypt_asm( +; unsigned char *m, +; const unsigned char *c, +; unsigned long long clen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *m, is passed in r24:r25 +; const unsigned char *c, is passed in r22:r23 +; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_decrypt_asm +crypto_aead_decrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b011 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + + ldi ZH, hi8(SRAM_ADDITIONAL) + ldi ZL, lo8(SRAM_ADDITIONAL) + rcall Finalization + + sbiw ZL, CRYPTO_ABYTES + ldi cnt0, CRYPTO_ABYTES +compare_tag: + ld tmp0, Z+ + ld tmp1, Y+ + cp tmp0, tmp1 + brne return_tag_not_match + dec cnt0 + brne compare_tag + rjmp return_tag_match + +return_tag_not_match: + ldi r25, 0xFF + ldi r24, 0xFF + rjmp crypto_aead_decrypt_end + +return_tag_match: + clr r25 + clr r24 +crypto_aead_decrypt_end: + POP_ALL +ret + +; #ifdef CRYPTO_AEAD +#endif + + +#ifdef CRYPTO_HASH + +; void crypto_hash_asm( +; unsigned char *out, +; const unsigned char *in, +; unsigned long long inlen +; ) +; +; unsigned char *out, is passed in r24:r25 +; const unsigned char *in, is passed in r22:r23 +; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used +.global crypto_hash_asm +crypto_hash_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + mov mclen, r20 + + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi cnt0, STATE_INBYTES - 1 +#else + ldi cnt0, STATE_INBYTES +#endif + clr tmp0 +zero_state: + st X+, tmp0 + dec cnt0 + brne zero_state + +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + ldi rn, NR_h + ldi AEDH, 0b100 + +HASH_ABSORBING: + mov radlen, mclen + tst radlen + breq EMPTY_M + ldi rate, HASH_RATE_INBYTES + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + rcall AUTH + rjmp HASH_SQUEEZING + +EMPTY_M: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 + rcall Permutation + +HASH_SQUEEZING: + ldi rate, HASH_SQUEEZE_RATE_INBYTES + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + ldi tcnt, CRYPTO_BYTES +SQUEEZING_loop: + rcall EXTRACT_from_State + subi tcnt, HASH_SQUEEZE_RATE_INBYTES + breq HASH_SQUEEZING_end + rcall Permutation + rjmp SQUEEZING_loop +HASH_SQUEEZING_end: + POP_ALL +ret + +#endif + + +; Byte Order In AVR 8: +; KNOT-AEAD(128, 256, 64): +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[7] <<< 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Perm_row_1[0] 1 +; N[ 9] AEAD_State[ 9] | Perm_row_1[1] 1 +; N[10] AEAD_State[10] | Perm_row_1[2] 1 +; N[11] AEAD_State[11] | Perm_row_1[3] 1 +; N[12] AEAD_State[12] | Perm_row_1[4] 1 +; N[13] AEAD_State[13] | Perm_row_1[5] 1 +; N[14] AEAD_State[14] | Perm_row_1[6] 1 +; N[15] AEAD_State[15] | Perm_row_1[7] <<< 1 +; K[ 0] AEAD_State[16] | Perm_row_2[0] 8 +; K[ 1] AEAD_State[17] | Perm_row_2[1] 8 +; K[ 2] AEAD_State[18] | Perm_row_2[2] 8 +; K[ 3] AEAD_State[19] | Perm_row_2[3] 8 +; K[ 4] AEAD_State[20] | Perm_row_2[4] 8 +; K[ 5] AEAD_State[21] | Perm_row_2[5] 8 +; K[ 6] AEAD_State[22] | Perm_row_2[6] 8 +; K[ 7] AEAD_State[23] | Perm_row_2[7] <<< 8 +; K[ 8] AEAD_State[24] | Perm_row_3[0] 25 +; K[ 9] AEAD_State[25] | Perm_row_3[1] 25 +; K[10] AEAD_State[26] | Perm_row_3[2] 25 +; K[11] AEAD_State[27] | Perm_row_3[3] 25 +; K[12] AEAD_State[28] | Perm_row_3[4] 25 +; K[13] AEAD_State[29] | Perm_row_3[5] 25 +; K[14] AEAD_State[30] | Perm_row_3[6] 25 +; K[15] AEAD_State[31] | ^0x80 Perm_row_3[7] <<< 25 +; +; +; KNOT-AEAD(128, 384, 192): +; Initalization +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[ 0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[ 1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[ 2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[ 3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[ 4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[ 5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[ 6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[ 7] 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Message[ 8] 0x00 Perm_row_0[ 8] 0 Tag[ 8] +; N[ 9] AEAD_State[ 9] | Message[ 9] 0x00 Perm_row_0[ 9] 0 Tag[ 9] +; N[10] AEAD_State[10] | Message[10] 0x00 Perm_row_0[10] 0 Tag[10] +; N[11] AEAD_State[11] | Message[11] 0x00 Perm_row_0[11] <<< 0 Tag[11] +; N[12] AEAD_State[12] | Message[12] 0x00 Perm_row_1[ 0] 1 Tag[12] +; N[13] AEAD_State[13] | Message[13] 0x00 Perm_row_1[ 1] 1 Tag[13] +; N[14] AEAD_State[14] | Message[14] 0x00 Perm_row_1[ 2] 1 Tag[14] +; N[15] AEAD_State[15] | Message[15] 0x00 Perm_row_1[ 3] 1 Tag[15] +; K[ 0] AEAD_State[16] | Message[16] 0x00 Perm_row_1[ 4] 1 +; K[ 1] AEAD_State[17] | Message[17] 0x00 Perm_row_1[ 5] 1 +; K[ 2] AEAD_State[18] | Message[18] 0x00 Perm_row_1[ 6] 1 +; K[ 3] AEAD_State[19] | Message[19] 0x00 Perm_row_1[ 7] 1 +; K[ 4] AEAD_State[20] | Message[20] 0x00 Perm_row_1[ 8] 1 +; K[ 5] AEAD_State[21] | Message[21] 0x00 Perm_row_1[ 9] 1 +; K[ 6] AEAD_State[22] | Message[22] 0x00 Perm_row_1[10] 1 +; K[ 7] AEAD_State[23] | Message[23] 0x00 Perm_row_1[11] <<< 1 +; K[ 8] AEAD_State[24] | Perm_row_2[ 0] 8 +; K[ 9] AEAD_State[25] | Perm_row_2[ 1] 8 +; K[10] AEAD_State[26] | Perm_row_2[ 2] 8 +; K[11] AEAD_State[27] | Perm_row_2[ 3] 8 +; K[12] AEAD_State[28] | Perm_row_2[ 4] 8 +; K[13] AEAD_State[29] | Perm_row_2[ 5] 8 +; K[14] AEAD_State[30] | Perm_row_2[ 6] 8 +; K[15] AEAD_State[31] | Perm_row_2[ 7] 8 +; 0x00 AEAD_State[32] | Perm_row_2[ 8] 8 +; 0x00 AEAD_State[33] | Perm_row_2[ 9] 8 +; 0x00 AEAD_State[34] | Perm_row_2[10] 8 +; 0x00 AEAD_State[35] | Perm_row_2[11] <<< 8 +; 0x00 AEAD_State[36] | Perm_row_3[ 0] 55 +; 0x00 AEAD_State[37] | Perm_row_3[ 1] 55 +; 0x00 AEAD_State[38] | Perm_row_3[ 2] 55 +; 0x00 AEAD_State[39] | Perm_row_3[ 3] 55 +; 0x00 AEAD_State[40] | Perm_row_3[ 4] 55 +; 0x00 AEAD_State[41] | Perm_row_3[ 5] 55 +; 0x00 AEAD_State[42] | Perm_row_3[ 6] 55 +; 0x00 AEAD_State[43] | Perm_row_3[ 7] 55 +; 0x00 AEAD_State[44] | Perm_row_3[ 8] 55 +; 0x00 AEAD_State[45] | Perm_row_3[ 9] 55 +; 0x00 AEAD_State[46] | Perm_row_3[10] 55 +; 0x00 ^0x80 AEAD_State[47] | ^0x80 Perm_row_3[11] <<< 55 diff --git a/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/hash.c b/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/hash.c new file mode 100644 index 0000000..dbbe4df --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/hash.c @@ -0,0 +1,32 @@ +#include +#include +#include +#include +#include "api.h" +#include "crypto_hash.h" + +extern void crypto_hash_asm( + unsigned char *out, + const unsigned char *in, + unsigned char inlen + ); + +int crypto_hash( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen +) +{ + /* + ... + ... the code for the hash function implementation goes here + ... generating a hash value out[0],out[1],...,out[CRYPTO_BYTES-1] + ... from a message in[0],in[1],...,in[in-1] + ... + ... return 0; + */ + + crypto_hash_asm(out, in, inlen); + + return 0; +} \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/knot256.h b/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/knot256.h new file mode 100644 index 0000000..d16bf8c --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/knot256.h @@ -0,0 +1,197 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +#include "assist.h" + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi rc, 0x01 + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + +round_loop_start: + rjmp AddRC_SubColumns_Start + +load_columns_table: + rjmp load_column0 + rjmp load_column1 + rjmp load_column2 + rjmp load_column3 + rjmp load_column4 + rjmp load_column5 + rjmp load_column6 + rjmp load_column7 + rjmp amend_shiftRow + +load_column0: + mov x3j, x30 + rjmp Sbox_one_column +load_column1: + mov x30, x3j + mov x3j, x31 + rjmp Sbox_one_column +load_column2: + mov x31, x3j + mov x3j, x32 + rjmp Sbox_one_column +load_column3: + mov x32, x3j + mov x3j, x33 + rjmp Sbox_one_column +load_column4: + mov x33, x3j + mov x3j, x34 + rjmp Sbox_one_column +load_column5: + mov x34, x3j + mov x3j, x35 + rjmp Sbox_one_column +load_column6: + mov x35, x3j + mov x3j, x36 + rjmp Sbox_one_column +load_column7: + mov x36, x3j + mov x3j, x37 + rjmp Sbox_one_column + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +LFSR_table: + rjmp LFSR6 + rjmp LFSR7 +LFSR6: + LFSR6_MACRO + rjmp LFSR_DONE +LFSR7: + LFSR7_MACRO + rjmp LFSR_DONE +#endif + +;;;;;;;;;;;;;;;;;;;;;;;; Real Start +AddRC_SubColumns_Start: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + clr ccnt + ld x0j, Y + eor x0j, rc + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + ldi ZL, pm_lo8(LFSR_table) + ldi ZH, pm_hi8(LFSR_table) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[1] = 1 for HASH + adiw ZL, 1 + ijmp +LFSR_DONE: +#elif defined(CRYPTO_AEAD) + LFSR6_MACRO ; only AEAD +#else + LFSR7_MACRO ; only HASH +#endif + + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + ldi ZL, pm_lo8(load_columns_table) + ldi ZH, pm_hi8(load_columns_table) + ijmp +Sbox_one_column: + Sbox x0j, x1j, x2j, x3j + + ; 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- x- -- 1 + ; -- -- -- -- x' -- -- -- 3 + ; 4 3 2 1 0 7 6 5 + ; Store a byte to Row 0 + st Y, x0j + ; Store a byte combined with ShiftRow1 + lsl t1j + mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left) + rol x1j + std Y + ROW_INBYTES, x1j + ; Store a byte combined with ShiftRow2 + inc ccnt + cpi ccnt, ROW_INBYTES + breq ROW2_WRAP + ldd t2j, Y + 2 * ROW_INBYTES + 1 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t2j + jmp NO_ROW2_WRAP +ROW2_WRAP: + std Y + ROW_INBYTES + 1, x2j + ; remain ShiftRow3 to be done at 'amend_shiftRow' +NO_ROW2_WRAP: + adiw YL, 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + + adiw ZL, 1 + ijmp + +amend_shiftRow: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + + ld x1j, Y + bst t1j, 7 + bld x1j, 0 + st Y, x1j + + ; <<< 1 + mov x37, x3j + rol x3j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + ; <<< 24 + ; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5 + mov x3j, x30 + mov x30, x35 + mov x35, x32 + mov x32, x37 + mov x37, x34 + mov x34, x31 + mov x31, x36 + mov x36, x33 + mov x33, x3j + + dec rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + + POP_CONFLICT +ret \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/knot384.h b/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/knot384.h new file mode 100644 index 0000000..65c474a --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/knot384.h @@ -0,0 +1,219 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +#include "assist.h" + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi rc, 0x01 + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + +round_loop_start: + rjmp AddRC_SubColumns_Start + +load_columns_table: + rjmp load_column0 + rjmp load_column1 + rjmp load_column2 + rjmp load_column3 + rjmp load_column4 + rjmp load_column5 + rjmp load_column6 + rjmp load_column7 + rjmp load_column8 + rjmp load_column9 + rjmp load_columna + rjmp load_columnb + rjmp amend_shiftRow + +load_column0: + mov x3j, x30 + rjmp Sbox_one_column +load_column1: + mov x30, x3j + mov x3j, x31 + rjmp Sbox_one_column +load_column2: + mov x31, x3j + mov x3j, x32 + rjmp Sbox_one_column +load_column3: + mov x32, x3j + mov x3j, x33 + rjmp Sbox_one_column +load_column4: + mov x33, x3j + mov x3j, x34 + rjmp Sbox_one_column +load_column5: + mov x34, x3j + mov x3j, x35 + rjmp Sbox_one_column +load_column6: + mov x35, x3j + mov x3j, x36 + rjmp Sbox_one_column +load_column7: + mov x36, x3j + mov x3j, x37 + rjmp Sbox_one_column +load_column8: + mov x37, x3j + mov x3j, x38 + rjmp Sbox_one_column +load_column9: + mov x38, x3j + mov x3j, x39 + rjmp Sbox_one_column +load_columna: + mov x39, x3j + mov x3j, x3a + rjmp Sbox_one_column +load_columnb: + mov x3a, x3j + mov x3j, x3b + rjmp Sbox_one_column + +;;;;;;;;;;;;;;;;;;;;;;;; Real Start +AddRC_SubColumns_Start: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + ldi ZL, pm_lo8(load_columns_table) + ldi ZH, pm_hi8(load_columns_table) + clr ccnt + ld x0j, Y + eor x0j, rc + LFSR7_MACRO + + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + ijmp +Sbox_one_column: + Sbox x0j, x1j, x2j, x3j + + ; b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- x- -- 1 + ; -- -- -- -- x' -- -- -- -- -- -- -- 7 + ; 4 3 2 1 0 b a 9 8 7 6 5 + ; Store a byte to Row 0 + st Y, x0j + ; Store a byte combined with ShiftRow 1 + lsl t1j + mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left) + rol x1j + std Y + ROW_INBYTES, x1j + ; Store a byte combined with ShiftRow 2 + inc ccnt + cpi ccnt, ROW_INBYTES + breq ROW2_WRAP + ldd t2j, Y + 2 * ROW_INBYTES + 1 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t2j + jmp NO_ROW2_WRAP +ROW2_WRAP: + std Y + ROW_INBYTES + 1, x2j + ; remain ShiftRow3 to be done at 'amend_shiftRow' +NO_ROW2_WRAP: + adiw YL, 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + + adiw ZL, 1 + ijmp + +amend_shiftRow: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + + ld x1j, Y + bst t1j, 7 + bld x1j, 0 + st Y, x1j + + ; >>> 1 + mov x3b, x3j + ror x3j + ror x3a + ror x39 + ror x38 + ror x37 + ror x36 + ror x35 + ror x34 + ror x33 + ror x32 + ror x31 + ror x30 + ror x3b + ; <<< 56 + ; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5 + ;mov x3j, x30 + ;mov x30, x35 + ;mov x35, x32 + ;mov x32, x37 + ;mov x37, x34 + ;mov x34, x31 + ;mov x31, x36 + ;mov x36, x33 + ;mov x33, x3j + mov x3j, x30 + mov x30, x35 + mov x35, x3a + mov x3a, x33 + mov x33, x38 + mov x38, x31 + mov x31, x36 + mov x36, x3b + mov x3b, x34 + mov x34, x39 + mov x39, x32 + mov x32, x37 + mov x37, x3j + + dec rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + + POP_CONFLICT +ret \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/knot512.h b/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/knot512.h new file mode 100644 index 0000000..d24b353 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/knot512.h @@ -0,0 +1,275 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +#include "assist.h" + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi rc, 0x01 + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + ld x3c, Y+ + ld x3d, Y+ + ld x3e, Y+ + ld x3f, Y+ + +round_loop_start: + rjmp AddRC_SubColumns_Start + +load_columns_table: + rjmp load_column0 + rjmp load_column1 + rjmp load_column2 + rjmp load_column3 + rjmp load_column4 + rjmp load_column5 + rjmp load_column6 + rjmp load_column7 + rjmp load_column8 + rjmp load_column9 + rjmp load_columna + rjmp load_columnb + rjmp load_columnc + rjmp load_columnd + rjmp load_columne + rjmp load_columnf + rjmp amend_shiftRow + +load_column0: + mov x3j, x30 + rjmp Sbox_one_column +load_column1: + mov x30, x3j + mov x3j, x31 + rjmp Sbox_one_column +load_column2: + mov x31, x3j + mov x3j, x32 + rjmp Sbox_one_column +load_column3: + mov x32, x3j + mov x3j, x33 + rjmp Sbox_one_column +load_column4: + mov x33, x3j + mov x3j, x34 + rjmp Sbox_one_column +load_column5: + mov x34, x3j + mov x3j, x35 + rjmp Sbox_one_column +load_column6: + mov x35, x3j + mov x3j, x36 + rjmp Sbox_one_column +load_column7: + mov x36, x3j + mov x3j, x37 + rjmp Sbox_one_column +load_column8: + mov x37, x3j + mov x3j, x38 + rjmp Sbox_one_column +load_column9: + mov x38, x3j + mov x3j, x39 + rjmp Sbox_one_column +load_columna: + mov x39, x3j + mov x3j, x3a + rjmp Sbox_one_column +load_columnb: + mov x3a, x3j + mov x3j, x3b + rjmp Sbox_one_column +load_columnc: + mov x3b, x3j + mov x3j, x3c + rjmp Sbox_one_column +load_columnd: + mov x3c, x3j + mov x3j, x3d + rjmp Sbox_one_column +load_columne: + mov x3d, x3j + mov x3j, x3e + rjmp Sbox_one_column +load_columnf: + mov x3e, x3j + mov x3j, x3f + rjmp Sbox_one_column + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +LFSR_table: + rjmp LFSR7 + rjmp LFSR8 +LFSR7: + LFSR7_MACRO + rjmp LFSR_DONE +LFSR8: + LFSR8_MACRO + rjmp LFSR_DONE +#endif + +;;;;;;;;;;;;;;;;;;;;;;;; Real Start +AddRC_SubColumns_Start: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + clr ccnt + ld x0j, Y + eor x0j, rc + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + ldi ZL, pm_lo8(LFSR_table) + ldi ZH, pm_hi8(LFSR_table) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[1] = 1 for HASH + adiw ZL, 1 + ijmp +LFSR_DONE: +#elif defined(CRYPTO_AEAD) + LFSR7_MACRO ; only AEAD +#else + LFSR8_MACRO ; only HASH +#endif + + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + ldd t2j, Y + 2 * ROW_INBYTES + 1 + ldi ZL, pm_lo8(load_columns_table) + ldi ZH, pm_hi8(load_columns_table) + ijmp +Sbox_one_column: + Sbox x0j, x1j, x2j, x3j + + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2 + ; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3 + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + ; Store a byte to Row 0 + st Y, x0j + ; Store a byte combined with ShiftRow1 + lsl t1j + mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left) + rol x1j + std Y + ROW_INBYTES, x1j + ; Store a byte combined with ShiftRow2 + inc ccnt + cpi ccnt, ROW_INBYTES - 1 + brsh ROW2_WRAP + ldd tmp0, Y + 2 * ROW_INBYTES + 2 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes + std Y + 2 * ROW_INBYTES + 2, x2j + mov x2j, t2j + mov t2j, tmp0 + jmp NO_ROW2_WRAP +ROW2_WRAP: + std Y + ROW_INBYTES + 2, x2j + mov x2j, t2j + + ; remain ShiftRow3 to be done at 'amend_shiftRow' +NO_ROW2_WRAP: + adiw YL, 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + + adiw ZL, 1 + ijmp + +amend_shiftRow: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + + ld x1j, Y + bst t1j, 7 + bld x1j, 0 + st Y, x1j + + ; <<< 1 + mov x3f, x3j + rol x3j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + rol x38 + rol x39 + rol x3a + rol x3b + rol x3c + rol x3d + rol x3e + rol x3f + ; <<< 24 + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 => + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + mov x3j, x30 + mov x30, x3d + mov x3d, x3a + mov x3a, x37 + mov x37, x34 + mov x34, x31 + mov x31, x3e + mov x3e, x3b + mov x3b, x38 + mov x38, x35 + mov x35, x32 + mov x32, x3f + mov x3f, x3c + mov x3c, x39 + mov x39, x36 + mov x36, x33 + mov x33, x3j + + dec rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + st Y+, x3c + st Y+, x3d + st Y+, x3e + st Y+, x3f + + POP_CONFLICT +ret \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/permutation.h b/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/permutation.h new file mode 100644 index 0000000..a57c5d3 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot256v2/avr8_lowrom/permutation.h @@ -0,0 +1,109 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; R E G I S T E R D E F I N I T I O N S +; ============================================ +; + +#define mclen r16 +#define radlen r17 +#define tcnt r17 +#define tmp0 r20 +#define tmp1 r21 +#define cnt0 r22 +#define rn r23 +#define rate r24 + +; +; ; AEDH = 0b000: for authenticate AD +; ; AEDH = 0b001: for encryption +; ; AEDH = 0b011: for decryption +; ; AEDH = 0b100: for hash +; #define AEDH r25 ; Register used globally within this program +; +; #define x30 r0 ; Register used without overlapping +; #define x31 r1 ; Register used without overlapping +; #define x32 r2 ; Register used without overlapping +; #define x33 r3 ; Register used without overlapping +; #define x34 r4 ; Register used without overlapping +; #define x35 r5 ; Register used without overlapping +; #define x36 r6 ; Register used without overlapping +; #define x37 r7 ; Register used without overlapping +; #define x38 r8 ; Register used without overlapping +; #define x39 r9 ; Register used without overlapping +; #define x3a r10 ; Register used without overlapping +; #define x3b r11 ; Register used without overlapping +; #define x3c r12 ; Register used without overlapping +; #define x3d r13 ; Register used without overlapping +; #define x3e r14 ; Register used without overlapping +; #define x3f r15 ; Register used without overlapping +; +; #define x0j r16 ; Register used overlapped, should be backed up before using +; #define x1j r17 ; Register used overlapped, should be backed up before using +; #define x2j r18 ; Register used overlapped, should be backed up before using +; #define x3j r19 ; Register used overlapped, should be backed up before using +; +; ; t2j used in knot512 to keep one byte in Row2 (because of rotating 16-bit), +; ; will not be interupt with LFSR which uses the overlapped register tmp1 +; #define t2j r21 ; Temporary register, used freely +; #define t1j r22 ; Temporary register, used freely +; #define t3j r23 ; Temporary register, used freely +; +; #define rc r24 ; Register used overlapped, should be backed up before using +; #define rcnt r26 ; Register used overlapped, should be backed up before using +; #define ccnt r27 ; Register used overlapped, should be backed up before using + +#define AEDH r25 +#define x30 r0 +#define x31 r1 +#define x32 r2 +#define x33 r3 +#define x34 r4 +#define x35 r5 +#define x36 r6 +#define x37 r7 +#define x38 r8 +#define x39 r9 +#define x3a r10 +#define x3b r11 +#define x3c r12 +#define x3d r13 +#define x3e r14 +#define x3f r15 + +#define x0j r16 +#define x1j r17 +#define x2j r18 +#define x3j r19 + +; t2j used in knot512 to keep one byte in Row2 (because of rotating 16-bit), +; will not be interupt with LFSR which uses the overlapped register tmp1 +#define t2j r21 +#define t1j r22 +#define t3j r23 + +#define rc r24 +#define rcnt r26 +#define ccnt r27 + +#if (STATE_INBITS==256) +#include "knot256.h" +#elif (STATE_INBITS==384) +#include "knot384.h" +#elif (STATE_INBITS==512) +#include "knot512.h" +#else +#error "Not specified key size and state size" +#endif + + diff --git a/knot/Implementations/crypto_hash/knot384/avr8_lowrom/api.h b/knot/Implementations/crypto_hash/knot384/avr8_lowrom/api.h new file mode 100644 index 0000000..1656d0c --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/avr8_lowrom/api.h @@ -0,0 +1 @@ +#define CRYPTO_BYTES 48 \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot384/avr8_lowrom/assist.h b/knot/Implementations/crypto_hash/knot384/avr8_lowrom/assist.h new file mode 100644 index 0000000..cb903a5 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/avr8_lowrom/assist.h @@ -0,0 +1,140 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +.macro LFSR6_MACRO + bst rc, 5 + bld tmp0, 0 + bst rc, 4 + bld tmp1, 0 + eor tmp0, tmp1 + ror tmp0 + rol rc + andi rc, 0x3F +.endm + +.macro LFSR7_MACRO + bst rc, 6 + bld tmp0, 0 + bst rc, 5 + bld tmp1, 0 + eor tmp0, tmp1 + ror tmp0 + rol rc + andi rc, 0x7F +.endm + +.macro LFSR8_MACRO + bst rc, 7 + bld tmp0, 0 + bst rc, 5 + bld tmp1, 0 + eor tmp0, tmp1 + bst rc, 4 + bld tmp1, 0 + eor tmp0, tmp1 + bst rc, 3 + bld tmp1, 0 + eor tmp0, tmp1 + ror tmp0 + rol rc +.endm + +.macro Sbox i0, i1, i2, i3 + mov tmp0, \i1 + com \i0 + and \i1, \i0 + eor \i1, \i2 + or \i2, tmp0 + eor \i0, \i3 + eor \i2, \i0 + eor tmp0, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, tmp0 + and tmp0, \i2 + eor \i1, tmp0 +.endm + +.macro PUSH_CONFLICT + push r16 + push r17 + push r18 + push r19 + + push r23 + push r24 + + push r26 + push r27 + push r28 + push r29 + push r30 + push r31 +.endm + +.macro POP_CONFLICT + pop r31 + pop r30 + pop r29 + pop r28 + pop r27 + pop r26 + + pop r24 + pop r23 + + pop r19 + pop r18 + pop r17 + pop r16 +.endm + +.macro PUSH_ALL + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 +.endm + +.macro POP_ALL + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + clr r1 +.endm \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot384/avr8_lowrom/config.h b/knot/Implementations/crypto_hash/knot384/avr8_lowrom/config.h new file mode 100644 index 0000000..c9f6bf2 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/avr8_lowrom/config.h @@ -0,0 +1,131 @@ +#ifndef __CONFIG_H__ +#define __CONFIG_H__ + +//#define CRYPTO_AEAD +#define CRYPTO_HASH + +#define MAX_MESSAGE_LENGTH 128 + +#define STATE_INBITS 384 +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBITS 192 +/* For CRYPTO_HASH */ +#define CRYPTO_BITS 384 + +#define STATE_INBYTES ((STATE_INBITS + 7) / 8) +#define ROW_INBITS ((STATE_INBITS + 3) / 4) +#define ROW_INBYTES ((ROW_INBITS + 7) / 8) + +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8) +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES +#define CRYPTO_ABYTES CRYPTO_KEYBYTES +#define CRYPTO_NOOVERLAP 1 + +#define MAX_ASSOCIATED_DATA_LENGTH 32 +#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES) + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +/* For CRYPTO_HASH */ +#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8) + + + +#define DOMAIN_BITS 0x80 +#define PAD_BITS 0x01 +#define S384_R192_BITS 0x80 + +#if (STATE_INBITS==256) +#define C1 1 +#define C2 8 +#define C3 25 +#elif (STATE_INBITS==384) +#define C1 1 +#define C2 8 +#define C3 55 +#elif (STATE_INBITS==512) +#define C1 1 +#define C2 16 +#define C3 25 +#else +#error "Not specified state size" +#endif + +#ifdef CRYPTO_AEAD +/* For CRYPTO_AEAD */ +#define KEY_INBITS (CRYPTO_KEYBYTES * 8) +#define KEY_INBYTES (CRYPTO_KEYBYTES) + +#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8) +#define NONCE_INBYTES (CRYPTO_NPUBBYTES) + +#define TAG_INBITS (CRYPTO_ABYTES * 8) +#define TAG_INBYTES (CRYPTO_ABYTES) + +#if (KEY_INBITS==128) && (STATE_INBITS==256) +#define RATE_INBITS 64 +#define NR_0 52 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==128) && (STATE_INBITS==384) +#define RATE_INBITS 192 +#define NR_0 76 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==192) && (STATE_INBITS==384) +#define RATE_INBITS 96 +#define NR_0 76 +#define NR_i 40 +#define NR_f 44 +#elif (KEY_INBITS==256) && (STATE_INBITS==512) +#define RATE_INBITS 128 +#define NR_0 100 +#define NR_i 52 +#define NR_f 56 +#else +#error "Not specified key size and state size" +#endif + +#define RATE_INBYTES ((RATE_INBITS + 7) / 8) +#define SQUEEZE_RATE_INBYTES TAG_INBYTES + +#endif + +#ifdef CRYPTO_HASH +/* For CRYPTO_HASH */ +#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8) + +#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256) +#define HASH_RATE_INBITS 32 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 68 +#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 128 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 80 +#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 48 +#define HASH_SQUEEZE_RATE_INBITS 192 +#define NR_h 104 +#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512) +#define HASH_RATE_INBITS 64 +#define HASH_SQUEEZE_RATE_INBITS 256 +#define NR_h 140 +#else +#error "Not specified hash digest size and state size" +#endif + +#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8) +#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8) + +#endif + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot384/avr8_lowrom/crypto_hash.h b/knot/Implementations/crypto_hash/knot384/avr8_lowrom/crypto_hash.h new file mode 100644 index 0000000..342a639 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/avr8_lowrom/crypto_hash.h @@ -0,0 +1,13 @@ +#ifdef __cplusplus +extern "C" { +#endif + +int crypto_hash( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen + ); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot384/avr8_lowrom/encrypt.c b/knot/Implementations/crypto_hash/knot384/avr8_lowrom/encrypt.c new file mode 100644 index 0000000..baf0a3b --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/avr8_lowrom/encrypt.c @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#include "config.h" + +extern void crypto_aead_encrypt_asm( + unsigned char *c, + const unsigned char *m, + unsigned char mlen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern int crypto_aead_decrypt_asm( + unsigned char *m, + const unsigned char *c, + unsigned char clen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern void crypto_hash_asm( + unsigned char *out, + const unsigned char *in, + unsigned char inlen + ); + + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the cipher implementation goes here, + ... generating a ciphertext c[0],c[1],...,c[*clen-1] + ... from a plaintext m[0],m[1],...,m[mlen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce npub[0],npub[1],.. + ... and secret key k[0],k[1],... + ... the implementation shall not use nsec + ... + ... return 0; + */ + + (void)nsec; + + crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k); + + *clen = mlen + TAG_INBYTES; + return 0; +} + + + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the AEAD implementation goes here, + ... generating a plaintext m[0],m[1],...,m[*mlen-1] + ... and secret message number nsec[0],nsec[1],... + ... from a ciphertext c[0],c[1],...,c[clen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce number npub[0],npub[1],... + ... and secret key k[0],k[1],... + ... + ... return 0; + */ + unsigned long long mlen_; + unsigned char tag_is_match; + + (void)nsec; + if (clen < CRYPTO_ABYTES) { + return -1; + } + mlen_ = clen - CRYPTO_ABYTES; + + tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k); + + if (tag_is_match != 0) + { + memset(m, 0, (size_t)mlen_); + return -1; + } + + *mlen = mlen_; + return 0; +} \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot384/avr8_lowrom/encrypt_core.S b/knot/Implementations/crypto_hash/knot384/avr8_lowrom/encrypt_core.S new file mode 100644 index 0000000..cb7aed5 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/avr8_lowrom/encrypt_core.S @@ -0,0 +1,537 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; S R A M D E F I N I T I O N S +; ============================================ +; +#include +#include "config.h" + +.section .noinit + SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (STATE_INBYTES > 32) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (STATE_INBYTES > 48) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_LEN: .BYTE 0, 0 +#ifdef CRYPTO_AEAD +; For CRYPTO_AEAD + SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0 + SRAM_ADLEN: .BYTE 0, 0 + SRAM_NONCE_ADDR: .BYTE 0, 0 + SRAM_KEY_ADDR: .BYTE 0, 0 + + SRAM_ADDITIONAL: + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (CRYPTO_ABYTES > 16) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (CRYPTO_ABYTES > 24) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + +#endif + +.section .text + +#include "permutation.h" + +; require YH:YL be the address of the current associated data/cipher/message block +; for enc and dec, store ciphertext or plaintext +; require ZH:ZL be the address of the current cipher/message block +XOR_to_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov cnt0, rate +XOR_to_State_loop: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_to_State_loop +; YH:YL are now the address of the next associated data block +ret + +; require YH:YL pointed to the input data +; require ZH:ZL pointed to the output data +; require cnt0 containes the nubmer of bytes in source data +; require number of bytes in source data less than rate, i.e., 0 <= cnt0 < rate +; +; the 0th bit in AEDH is used to distinguish (auth AD) or (enc/dec M/C): +; AEDH[0] = 0 for (auth AD), AEDH[0] = 1 for (enc/dec M/C) +; the 1th bit in AEDH is used to distinguish (auth AD/enc M) or (dec C): +; AEDH[1] = 0 for (auth AD/enc M), AEDH[1] = 1 for (dec C) +; AEDH = 0b000 for (auth AD) +; AEDH = 0b001 for (enc M) +; AEDH = 0b011 for (dec C) +Pad_XOR_to_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + tst cnt0 + breq XOR_padded_data +XOR_source_data_loop: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_source_data_loop +XOR_padded_data: + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 +ret + +AddDomain: + ldi XH, hi8(SRAM_STATE + STATE_INBYTES - 1) + ldi XL, lo8(SRAM_STATE + STATE_INBYTES - 1) + ldi tmp0, DOMAIN_BITS + ld tmp1, X + eor tmp0, tmp1 + st X, tmp0 +ret + +; require ZH:ZL be the address of the destination +EXTRACT_from_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov tmp1, rate +EXTRACT_from_State_loop: + ld tmp0, X+ + st Z+, tmp0 + dec tmp1 + brne EXTRACT_from_State_loop +ret + +AUTH: + tst radlen + breq AUTH_end + + cp radlen, rate + brlo auth_ad_padded_block + +auth_ad_loop: + rcall XOR_to_State + rcall Permutation + sub radlen, rate + cp radlen, rate + brlo auth_ad_padded_block + rjmp auth_ad_loop + +auth_ad_padded_block: + mov cnt0, radlen + rcall Pad_XOR_to_State + rcall Permutation + +AUTH_end: +ret + +#ifdef CRYPTO_AEAD +Initialization: + ldi rn, NR_0 + ldi XL, lo8(SRAM_STATE) + ldi XH, hi8(SRAM_STATE) + + lds YH, SRAM_NONCE_ADDR + lds YL, SRAM_NONCE_ADDR + 1 + ldi cnt0, CRYPTO_NPUBBYTES +load_nonce_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_nonce_loop + + lds YH, SRAM_KEY_ADDR + lds YL, SRAM_KEY_ADDR + 1 + ldi cnt0, CRYPTO_KEYBYTES +load_key_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_key_loop + +#if (STATE_INBITS==384) && (RATE_INBITS==192) + ldi cnt0, (STATE_INBYTES - CRYPTO_NPUBBYTES - CRYPTO_KEYBYTES - 1) + clr tmp0 +empty_state_loop: + st X+, tmp0 + dec cnt0 + brne empty_state_loop + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + rcall Permutation +ret + +ENC: + tst mclen + breq ENC_end + + cp mclen, rate + brlo enc_padded_block + +enc_loop: + rcall XOR_to_State + ldi rn, NR_i + rcall Permutation + sub mclen, rate + cp mclen, rate + brlo enc_padded_block + rjmp enc_loop + +enc_padded_block: + mov cnt0, mclen + rcall Pad_XOR_to_State +ENC_end: +ret + +Finalization: + ldi rate, SQUEEZE_RATE_INBYTES + ldi rn, NR_f + rcall Permutation + rcall EXTRACT_from_State +ret + +; void crypto_aead_encrypt_asm( +; unsigned char *c, +; const unsigned char *m, +; unsigned long long mlen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *c, is passed in r24:r25 +; const unsigned char *m, is passed in r22:r23 +; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_encrypt_asm +crypto_aead_encrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b001 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + rcall Finalization + POP_ALL +ret + +; int crypto_aead_decrypt_asm( +; unsigned char *m, +; const unsigned char *c, +; unsigned long long clen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *m, is passed in r24:r25 +; const unsigned char *c, is passed in r22:r23 +; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_decrypt_asm +crypto_aead_decrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b011 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + + ldi ZH, hi8(SRAM_ADDITIONAL) + ldi ZL, lo8(SRAM_ADDITIONAL) + rcall Finalization + + sbiw ZL, CRYPTO_ABYTES + ldi cnt0, CRYPTO_ABYTES +compare_tag: + ld tmp0, Z+ + ld tmp1, Y+ + cp tmp0, tmp1 + brne return_tag_not_match + dec cnt0 + brne compare_tag + rjmp return_tag_match + +return_tag_not_match: + ldi r25, 0xFF + ldi r24, 0xFF + rjmp crypto_aead_decrypt_end + +return_tag_match: + clr r25 + clr r24 +crypto_aead_decrypt_end: + POP_ALL +ret + +; #ifdef CRYPTO_AEAD +#endif + + +#ifdef CRYPTO_HASH + +; void crypto_hash_asm( +; unsigned char *out, +; const unsigned char *in, +; unsigned long long inlen +; ) +; +; unsigned char *out, is passed in r24:r25 +; const unsigned char *in, is passed in r22:r23 +; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used +.global crypto_hash_asm +crypto_hash_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + mov mclen, r20 + + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi cnt0, STATE_INBYTES - 1 +#else + ldi cnt0, STATE_INBYTES +#endif + clr tmp0 +zero_state: + st X+, tmp0 + dec cnt0 + brne zero_state + +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + ldi rn, NR_h + ldi AEDH, 0b100 + +HASH_ABSORBING: + mov radlen, mclen + tst radlen + breq EMPTY_M + ldi rate, HASH_RATE_INBYTES + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + rcall AUTH + rjmp HASH_SQUEEZING + +EMPTY_M: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 + rcall Permutation + +HASH_SQUEEZING: + ldi rate, HASH_SQUEEZE_RATE_INBYTES + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + ldi tcnt, CRYPTO_BYTES +SQUEEZING_loop: + rcall EXTRACT_from_State + subi tcnt, HASH_SQUEEZE_RATE_INBYTES + breq HASH_SQUEEZING_end + rcall Permutation + rjmp SQUEEZING_loop +HASH_SQUEEZING_end: + POP_ALL +ret + +#endif + + +; Byte Order In AVR 8: +; KNOT-AEAD(128, 256, 64): +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[7] <<< 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Perm_row_1[0] 1 +; N[ 9] AEAD_State[ 9] | Perm_row_1[1] 1 +; N[10] AEAD_State[10] | Perm_row_1[2] 1 +; N[11] AEAD_State[11] | Perm_row_1[3] 1 +; N[12] AEAD_State[12] | Perm_row_1[4] 1 +; N[13] AEAD_State[13] | Perm_row_1[5] 1 +; N[14] AEAD_State[14] | Perm_row_1[6] 1 +; N[15] AEAD_State[15] | Perm_row_1[7] <<< 1 +; K[ 0] AEAD_State[16] | Perm_row_2[0] 8 +; K[ 1] AEAD_State[17] | Perm_row_2[1] 8 +; K[ 2] AEAD_State[18] | Perm_row_2[2] 8 +; K[ 3] AEAD_State[19] | Perm_row_2[3] 8 +; K[ 4] AEAD_State[20] | Perm_row_2[4] 8 +; K[ 5] AEAD_State[21] | Perm_row_2[5] 8 +; K[ 6] AEAD_State[22] | Perm_row_2[6] 8 +; K[ 7] AEAD_State[23] | Perm_row_2[7] <<< 8 +; K[ 8] AEAD_State[24] | Perm_row_3[0] 25 +; K[ 9] AEAD_State[25] | Perm_row_3[1] 25 +; K[10] AEAD_State[26] | Perm_row_3[2] 25 +; K[11] AEAD_State[27] | Perm_row_3[3] 25 +; K[12] AEAD_State[28] | Perm_row_3[4] 25 +; K[13] AEAD_State[29] | Perm_row_3[5] 25 +; K[14] AEAD_State[30] | Perm_row_3[6] 25 +; K[15] AEAD_State[31] | ^0x80 Perm_row_3[7] <<< 25 +; +; +; KNOT-AEAD(128, 384, 192): +; Initalization +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[ 0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[ 1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[ 2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[ 3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[ 4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[ 5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[ 6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[ 7] 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Message[ 8] 0x00 Perm_row_0[ 8] 0 Tag[ 8] +; N[ 9] AEAD_State[ 9] | Message[ 9] 0x00 Perm_row_0[ 9] 0 Tag[ 9] +; N[10] AEAD_State[10] | Message[10] 0x00 Perm_row_0[10] 0 Tag[10] +; N[11] AEAD_State[11] | Message[11] 0x00 Perm_row_0[11] <<< 0 Tag[11] +; N[12] AEAD_State[12] | Message[12] 0x00 Perm_row_1[ 0] 1 Tag[12] +; N[13] AEAD_State[13] | Message[13] 0x00 Perm_row_1[ 1] 1 Tag[13] +; N[14] AEAD_State[14] | Message[14] 0x00 Perm_row_1[ 2] 1 Tag[14] +; N[15] AEAD_State[15] | Message[15] 0x00 Perm_row_1[ 3] 1 Tag[15] +; K[ 0] AEAD_State[16] | Message[16] 0x00 Perm_row_1[ 4] 1 +; K[ 1] AEAD_State[17] | Message[17] 0x00 Perm_row_1[ 5] 1 +; K[ 2] AEAD_State[18] | Message[18] 0x00 Perm_row_1[ 6] 1 +; K[ 3] AEAD_State[19] | Message[19] 0x00 Perm_row_1[ 7] 1 +; K[ 4] AEAD_State[20] | Message[20] 0x00 Perm_row_1[ 8] 1 +; K[ 5] AEAD_State[21] | Message[21] 0x00 Perm_row_1[ 9] 1 +; K[ 6] AEAD_State[22] | Message[22] 0x00 Perm_row_1[10] 1 +; K[ 7] AEAD_State[23] | Message[23] 0x00 Perm_row_1[11] <<< 1 +; K[ 8] AEAD_State[24] | Perm_row_2[ 0] 8 +; K[ 9] AEAD_State[25] | Perm_row_2[ 1] 8 +; K[10] AEAD_State[26] | Perm_row_2[ 2] 8 +; K[11] AEAD_State[27] | Perm_row_2[ 3] 8 +; K[12] AEAD_State[28] | Perm_row_2[ 4] 8 +; K[13] AEAD_State[29] | Perm_row_2[ 5] 8 +; K[14] AEAD_State[30] | Perm_row_2[ 6] 8 +; K[15] AEAD_State[31] | Perm_row_2[ 7] 8 +; 0x00 AEAD_State[32] | Perm_row_2[ 8] 8 +; 0x00 AEAD_State[33] | Perm_row_2[ 9] 8 +; 0x00 AEAD_State[34] | Perm_row_2[10] 8 +; 0x00 AEAD_State[35] | Perm_row_2[11] <<< 8 +; 0x00 AEAD_State[36] | Perm_row_3[ 0] 55 +; 0x00 AEAD_State[37] | Perm_row_3[ 1] 55 +; 0x00 AEAD_State[38] | Perm_row_3[ 2] 55 +; 0x00 AEAD_State[39] | Perm_row_3[ 3] 55 +; 0x00 AEAD_State[40] | Perm_row_3[ 4] 55 +; 0x00 AEAD_State[41] | Perm_row_3[ 5] 55 +; 0x00 AEAD_State[42] | Perm_row_3[ 6] 55 +; 0x00 AEAD_State[43] | Perm_row_3[ 7] 55 +; 0x00 AEAD_State[44] | Perm_row_3[ 8] 55 +; 0x00 AEAD_State[45] | Perm_row_3[ 9] 55 +; 0x00 AEAD_State[46] | Perm_row_3[10] 55 +; 0x00 ^0x80 AEAD_State[47] | ^0x80 Perm_row_3[11] <<< 55 diff --git a/knot/Implementations/crypto_hash/knot384/avr8_lowrom/hash.c b/knot/Implementations/crypto_hash/knot384/avr8_lowrom/hash.c new file mode 100644 index 0000000..dbbe4df --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/avr8_lowrom/hash.c @@ -0,0 +1,32 @@ +#include +#include +#include +#include +#include "api.h" +#include "crypto_hash.h" + +extern void crypto_hash_asm( + unsigned char *out, + const unsigned char *in, + unsigned char inlen + ); + +int crypto_hash( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen +) +{ + /* + ... + ... the code for the hash function implementation goes here + ... generating a hash value out[0],out[1],...,out[CRYPTO_BYTES-1] + ... from a message in[0],in[1],...,in[in-1] + ... + ... return 0; + */ + + crypto_hash_asm(out, in, inlen); + + return 0; +} \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot384/avr8_lowrom/knot256.h b/knot/Implementations/crypto_hash/knot384/avr8_lowrom/knot256.h new file mode 100644 index 0000000..d16bf8c --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/avr8_lowrom/knot256.h @@ -0,0 +1,197 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +#include "assist.h" + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi rc, 0x01 + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + +round_loop_start: + rjmp AddRC_SubColumns_Start + +load_columns_table: + rjmp load_column0 + rjmp load_column1 + rjmp load_column2 + rjmp load_column3 + rjmp load_column4 + rjmp load_column5 + rjmp load_column6 + rjmp load_column7 + rjmp amend_shiftRow + +load_column0: + mov x3j, x30 + rjmp Sbox_one_column +load_column1: + mov x30, x3j + mov x3j, x31 + rjmp Sbox_one_column +load_column2: + mov x31, x3j + mov x3j, x32 + rjmp Sbox_one_column +load_column3: + mov x32, x3j + mov x3j, x33 + rjmp Sbox_one_column +load_column4: + mov x33, x3j + mov x3j, x34 + rjmp Sbox_one_column +load_column5: + mov x34, x3j + mov x3j, x35 + rjmp Sbox_one_column +load_column6: + mov x35, x3j + mov x3j, x36 + rjmp Sbox_one_column +load_column7: + mov x36, x3j + mov x3j, x37 + rjmp Sbox_one_column + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +LFSR_table: + rjmp LFSR6 + rjmp LFSR7 +LFSR6: + LFSR6_MACRO + rjmp LFSR_DONE +LFSR7: + LFSR7_MACRO + rjmp LFSR_DONE +#endif + +;;;;;;;;;;;;;;;;;;;;;;;; Real Start +AddRC_SubColumns_Start: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + clr ccnt + ld x0j, Y + eor x0j, rc + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + ldi ZL, pm_lo8(LFSR_table) + ldi ZH, pm_hi8(LFSR_table) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[1] = 1 for HASH + adiw ZL, 1 + ijmp +LFSR_DONE: +#elif defined(CRYPTO_AEAD) + LFSR6_MACRO ; only AEAD +#else + LFSR7_MACRO ; only HASH +#endif + + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + ldi ZL, pm_lo8(load_columns_table) + ldi ZH, pm_hi8(load_columns_table) + ijmp +Sbox_one_column: + Sbox x0j, x1j, x2j, x3j + + ; 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- x- -- 1 + ; -- -- -- -- x' -- -- -- 3 + ; 4 3 2 1 0 7 6 5 + ; Store a byte to Row 0 + st Y, x0j + ; Store a byte combined with ShiftRow1 + lsl t1j + mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left) + rol x1j + std Y + ROW_INBYTES, x1j + ; Store a byte combined with ShiftRow2 + inc ccnt + cpi ccnt, ROW_INBYTES + breq ROW2_WRAP + ldd t2j, Y + 2 * ROW_INBYTES + 1 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t2j + jmp NO_ROW2_WRAP +ROW2_WRAP: + std Y + ROW_INBYTES + 1, x2j + ; remain ShiftRow3 to be done at 'amend_shiftRow' +NO_ROW2_WRAP: + adiw YL, 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + + adiw ZL, 1 + ijmp + +amend_shiftRow: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + + ld x1j, Y + bst t1j, 7 + bld x1j, 0 + st Y, x1j + + ; <<< 1 + mov x37, x3j + rol x3j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + ; <<< 24 + ; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5 + mov x3j, x30 + mov x30, x35 + mov x35, x32 + mov x32, x37 + mov x37, x34 + mov x34, x31 + mov x31, x36 + mov x36, x33 + mov x33, x3j + + dec rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + + POP_CONFLICT +ret \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot384/avr8_lowrom/knot384.h b/knot/Implementations/crypto_hash/knot384/avr8_lowrom/knot384.h new file mode 100644 index 0000000..65c474a --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/avr8_lowrom/knot384.h @@ -0,0 +1,219 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +#include "assist.h" + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi rc, 0x01 + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + +round_loop_start: + rjmp AddRC_SubColumns_Start + +load_columns_table: + rjmp load_column0 + rjmp load_column1 + rjmp load_column2 + rjmp load_column3 + rjmp load_column4 + rjmp load_column5 + rjmp load_column6 + rjmp load_column7 + rjmp load_column8 + rjmp load_column9 + rjmp load_columna + rjmp load_columnb + rjmp amend_shiftRow + +load_column0: + mov x3j, x30 + rjmp Sbox_one_column +load_column1: + mov x30, x3j + mov x3j, x31 + rjmp Sbox_one_column +load_column2: + mov x31, x3j + mov x3j, x32 + rjmp Sbox_one_column +load_column3: + mov x32, x3j + mov x3j, x33 + rjmp Sbox_one_column +load_column4: + mov x33, x3j + mov x3j, x34 + rjmp Sbox_one_column +load_column5: + mov x34, x3j + mov x3j, x35 + rjmp Sbox_one_column +load_column6: + mov x35, x3j + mov x3j, x36 + rjmp Sbox_one_column +load_column7: + mov x36, x3j + mov x3j, x37 + rjmp Sbox_one_column +load_column8: + mov x37, x3j + mov x3j, x38 + rjmp Sbox_one_column +load_column9: + mov x38, x3j + mov x3j, x39 + rjmp Sbox_one_column +load_columna: + mov x39, x3j + mov x3j, x3a + rjmp Sbox_one_column +load_columnb: + mov x3a, x3j + mov x3j, x3b + rjmp Sbox_one_column + +;;;;;;;;;;;;;;;;;;;;;;;; Real Start +AddRC_SubColumns_Start: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + ldi ZL, pm_lo8(load_columns_table) + ldi ZH, pm_hi8(load_columns_table) + clr ccnt + ld x0j, Y + eor x0j, rc + LFSR7_MACRO + + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + ijmp +Sbox_one_column: + Sbox x0j, x1j, x2j, x3j + + ; b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- x- -- 1 + ; -- -- -- -- x' -- -- -- -- -- -- -- 7 + ; 4 3 2 1 0 b a 9 8 7 6 5 + ; Store a byte to Row 0 + st Y, x0j + ; Store a byte combined with ShiftRow 1 + lsl t1j + mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left) + rol x1j + std Y + ROW_INBYTES, x1j + ; Store a byte combined with ShiftRow 2 + inc ccnt + cpi ccnt, ROW_INBYTES + breq ROW2_WRAP + ldd t2j, Y + 2 * ROW_INBYTES + 1 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t2j + jmp NO_ROW2_WRAP +ROW2_WRAP: + std Y + ROW_INBYTES + 1, x2j + ; remain ShiftRow3 to be done at 'amend_shiftRow' +NO_ROW2_WRAP: + adiw YL, 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + + adiw ZL, 1 + ijmp + +amend_shiftRow: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + + ld x1j, Y + bst t1j, 7 + bld x1j, 0 + st Y, x1j + + ; >>> 1 + mov x3b, x3j + ror x3j + ror x3a + ror x39 + ror x38 + ror x37 + ror x36 + ror x35 + ror x34 + ror x33 + ror x32 + ror x31 + ror x30 + ror x3b + ; <<< 56 + ; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5 + ;mov x3j, x30 + ;mov x30, x35 + ;mov x35, x32 + ;mov x32, x37 + ;mov x37, x34 + ;mov x34, x31 + ;mov x31, x36 + ;mov x36, x33 + ;mov x33, x3j + mov x3j, x30 + mov x30, x35 + mov x35, x3a + mov x3a, x33 + mov x33, x38 + mov x38, x31 + mov x31, x36 + mov x36, x3b + mov x3b, x34 + mov x34, x39 + mov x39, x32 + mov x32, x37 + mov x37, x3j + + dec rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + + POP_CONFLICT +ret \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot384/avr8_lowrom/knot512.h b/knot/Implementations/crypto_hash/knot384/avr8_lowrom/knot512.h new file mode 100644 index 0000000..d24b353 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/avr8_lowrom/knot512.h @@ -0,0 +1,275 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +#include "assist.h" + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi rc, 0x01 + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + ld x3c, Y+ + ld x3d, Y+ + ld x3e, Y+ + ld x3f, Y+ + +round_loop_start: + rjmp AddRC_SubColumns_Start + +load_columns_table: + rjmp load_column0 + rjmp load_column1 + rjmp load_column2 + rjmp load_column3 + rjmp load_column4 + rjmp load_column5 + rjmp load_column6 + rjmp load_column7 + rjmp load_column8 + rjmp load_column9 + rjmp load_columna + rjmp load_columnb + rjmp load_columnc + rjmp load_columnd + rjmp load_columne + rjmp load_columnf + rjmp amend_shiftRow + +load_column0: + mov x3j, x30 + rjmp Sbox_one_column +load_column1: + mov x30, x3j + mov x3j, x31 + rjmp Sbox_one_column +load_column2: + mov x31, x3j + mov x3j, x32 + rjmp Sbox_one_column +load_column3: + mov x32, x3j + mov x3j, x33 + rjmp Sbox_one_column +load_column4: + mov x33, x3j + mov x3j, x34 + rjmp Sbox_one_column +load_column5: + mov x34, x3j + mov x3j, x35 + rjmp Sbox_one_column +load_column6: + mov x35, x3j + mov x3j, x36 + rjmp Sbox_one_column +load_column7: + mov x36, x3j + mov x3j, x37 + rjmp Sbox_one_column +load_column8: + mov x37, x3j + mov x3j, x38 + rjmp Sbox_one_column +load_column9: + mov x38, x3j + mov x3j, x39 + rjmp Sbox_one_column +load_columna: + mov x39, x3j + mov x3j, x3a + rjmp Sbox_one_column +load_columnb: + mov x3a, x3j + mov x3j, x3b + rjmp Sbox_one_column +load_columnc: + mov x3b, x3j + mov x3j, x3c + rjmp Sbox_one_column +load_columnd: + mov x3c, x3j + mov x3j, x3d + rjmp Sbox_one_column +load_columne: + mov x3d, x3j + mov x3j, x3e + rjmp Sbox_one_column +load_columnf: + mov x3e, x3j + mov x3j, x3f + rjmp Sbox_one_column + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +LFSR_table: + rjmp LFSR7 + rjmp LFSR8 +LFSR7: + LFSR7_MACRO + rjmp LFSR_DONE +LFSR8: + LFSR8_MACRO + rjmp LFSR_DONE +#endif + +;;;;;;;;;;;;;;;;;;;;;;;; Real Start +AddRC_SubColumns_Start: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + clr ccnt + ld x0j, Y + eor x0j, rc + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + ldi ZL, pm_lo8(LFSR_table) + ldi ZH, pm_hi8(LFSR_table) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[1] = 1 for HASH + adiw ZL, 1 + ijmp +LFSR_DONE: +#elif defined(CRYPTO_AEAD) + LFSR7_MACRO ; only AEAD +#else + LFSR8_MACRO ; only HASH +#endif + + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + ldd t2j, Y + 2 * ROW_INBYTES + 1 + ldi ZL, pm_lo8(load_columns_table) + ldi ZH, pm_hi8(load_columns_table) + ijmp +Sbox_one_column: + Sbox x0j, x1j, x2j, x3j + + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2 + ; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3 + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + ; Store a byte to Row 0 + st Y, x0j + ; Store a byte combined with ShiftRow1 + lsl t1j + mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left) + rol x1j + std Y + ROW_INBYTES, x1j + ; Store a byte combined with ShiftRow2 + inc ccnt + cpi ccnt, ROW_INBYTES - 1 + brsh ROW2_WRAP + ldd tmp0, Y + 2 * ROW_INBYTES + 2 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes + std Y + 2 * ROW_INBYTES + 2, x2j + mov x2j, t2j + mov t2j, tmp0 + jmp NO_ROW2_WRAP +ROW2_WRAP: + std Y + ROW_INBYTES + 2, x2j + mov x2j, t2j + + ; remain ShiftRow3 to be done at 'amend_shiftRow' +NO_ROW2_WRAP: + adiw YL, 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + + adiw ZL, 1 + ijmp + +amend_shiftRow: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + + ld x1j, Y + bst t1j, 7 + bld x1j, 0 + st Y, x1j + + ; <<< 1 + mov x3f, x3j + rol x3j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + rol x38 + rol x39 + rol x3a + rol x3b + rol x3c + rol x3d + rol x3e + rol x3f + ; <<< 24 + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 => + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + mov x3j, x30 + mov x30, x3d + mov x3d, x3a + mov x3a, x37 + mov x37, x34 + mov x34, x31 + mov x31, x3e + mov x3e, x3b + mov x3b, x38 + mov x38, x35 + mov x35, x32 + mov x32, x3f + mov x3f, x3c + mov x3c, x39 + mov x39, x36 + mov x36, x33 + mov x33, x3j + + dec rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + st Y+, x3c + st Y+, x3d + st Y+, x3e + st Y+, x3f + + POP_CONFLICT +ret \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot384/avr8_lowrom/permutation.h b/knot/Implementations/crypto_hash/knot384/avr8_lowrom/permutation.h new file mode 100644 index 0000000..a57c5d3 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot384/avr8_lowrom/permutation.h @@ -0,0 +1,109 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; R E G I S T E R D E F I N I T I O N S +; ============================================ +; + +#define mclen r16 +#define radlen r17 +#define tcnt r17 +#define tmp0 r20 +#define tmp1 r21 +#define cnt0 r22 +#define rn r23 +#define rate r24 + +; +; ; AEDH = 0b000: for authenticate AD +; ; AEDH = 0b001: for encryption +; ; AEDH = 0b011: for decryption +; ; AEDH = 0b100: for hash +; #define AEDH r25 ; Register used globally within this program +; +; #define x30 r0 ; Register used without overlapping +; #define x31 r1 ; Register used without overlapping +; #define x32 r2 ; Register used without overlapping +; #define x33 r3 ; Register used without overlapping +; #define x34 r4 ; Register used without overlapping +; #define x35 r5 ; Register used without overlapping +; #define x36 r6 ; Register used without overlapping +; #define x37 r7 ; Register used without overlapping +; #define x38 r8 ; Register used without overlapping +; #define x39 r9 ; Register used without overlapping +; #define x3a r10 ; Register used without overlapping +; #define x3b r11 ; Register used without overlapping +; #define x3c r12 ; Register used without overlapping +; #define x3d r13 ; Register used without overlapping +; #define x3e r14 ; Register used without overlapping +; #define x3f r15 ; Register used without overlapping +; +; #define x0j r16 ; Register used overlapped, should be backed up before using +; #define x1j r17 ; Register used overlapped, should be backed up before using +; #define x2j r18 ; Register used overlapped, should be backed up before using +; #define x3j r19 ; Register used overlapped, should be backed up before using +; +; ; t2j used in knot512 to keep one byte in Row2 (because of rotating 16-bit), +; ; will not be interupt with LFSR which uses the overlapped register tmp1 +; #define t2j r21 ; Temporary register, used freely +; #define t1j r22 ; Temporary register, used freely +; #define t3j r23 ; Temporary register, used freely +; +; #define rc r24 ; Register used overlapped, should be backed up before using +; #define rcnt r26 ; Register used overlapped, should be backed up before using +; #define ccnt r27 ; Register used overlapped, should be backed up before using + +#define AEDH r25 +#define x30 r0 +#define x31 r1 +#define x32 r2 +#define x33 r3 +#define x34 r4 +#define x35 r5 +#define x36 r6 +#define x37 r7 +#define x38 r8 +#define x39 r9 +#define x3a r10 +#define x3b r11 +#define x3c r12 +#define x3d r13 +#define x3e r14 +#define x3f r15 + +#define x0j r16 +#define x1j r17 +#define x2j r18 +#define x3j r19 + +; t2j used in knot512 to keep one byte in Row2 (because of rotating 16-bit), +; will not be interupt with LFSR which uses the overlapped register tmp1 +#define t2j r21 +#define t1j r22 +#define t3j r23 + +#define rc r24 +#define rcnt r26 +#define ccnt r27 + +#if (STATE_INBITS==256) +#include "knot256.h" +#elif (STATE_INBITS==384) +#include "knot384.h" +#elif (STATE_INBITS==512) +#include "knot512.h" +#else +#error "Not specified key size and state size" +#endif + + diff --git a/knot/Implementations/crypto_hash/knot512/avr8_lowrom/api.h b/knot/Implementations/crypto_hash/knot512/avr8_lowrom/api.h new file mode 100644 index 0000000..a46499d --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/avr8_lowrom/api.h @@ -0,0 +1 @@ +#define CRYPTO_BYTES 64 \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot512/avr8_lowrom/assist.h b/knot/Implementations/crypto_hash/knot512/avr8_lowrom/assist.h new file mode 100644 index 0000000..cb903a5 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/avr8_lowrom/assist.h @@ -0,0 +1,140 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +.macro LFSR6_MACRO + bst rc, 5 + bld tmp0, 0 + bst rc, 4 + bld tmp1, 0 + eor tmp0, tmp1 + ror tmp0 + rol rc + andi rc, 0x3F +.endm + +.macro LFSR7_MACRO + bst rc, 6 + bld tmp0, 0 + bst rc, 5 + bld tmp1, 0 + eor tmp0, tmp1 + ror tmp0 + rol rc + andi rc, 0x7F +.endm + +.macro LFSR8_MACRO + bst rc, 7 + bld tmp0, 0 + bst rc, 5 + bld tmp1, 0 + eor tmp0, tmp1 + bst rc, 4 + bld tmp1, 0 + eor tmp0, tmp1 + bst rc, 3 + bld tmp1, 0 + eor tmp0, tmp1 + ror tmp0 + rol rc +.endm + +.macro Sbox i0, i1, i2, i3 + mov tmp0, \i1 + com \i0 + and \i1, \i0 + eor \i1, \i2 + or \i2, tmp0 + eor \i0, \i3 + eor \i2, \i0 + eor tmp0, \i3 + and \i0, \i1 + eor \i3, \i1 + eor \i0, tmp0 + and tmp0, \i2 + eor \i1, tmp0 +.endm + +.macro PUSH_CONFLICT + push r16 + push r17 + push r18 + push r19 + + push r23 + push r24 + + push r26 + push r27 + push r28 + push r29 + push r30 + push r31 +.endm + +.macro POP_CONFLICT + pop r31 + pop r30 + pop r29 + pop r28 + pop r27 + pop r26 + + pop r24 + pop r23 + + pop r19 + pop r18 + pop r17 + pop r16 +.endm + +.macro PUSH_ALL + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 +.endm + +.macro POP_ALL + pop r29 + pop r28 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + clr r1 +.endm \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot512/avr8_lowrom/config.h b/knot/Implementations/crypto_hash/knot512/avr8_lowrom/config.h new file mode 100644 index 0000000..70fa8d6 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/avr8_lowrom/config.h @@ -0,0 +1,131 @@ +#ifndef __CONFIG_H__ +#define __CONFIG_H__ + +//#define CRYPTO_AEAD +#define CRYPTO_HASH + +#define MAX_MESSAGE_LENGTH 128 + +#define STATE_INBITS 512 +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBITS 256 +/* For CRYPTO_HASH */ +#define CRYPTO_BITS 512 + +#define STATE_INBYTES ((STATE_INBITS + 7) / 8) +#define ROW_INBITS ((STATE_INBITS + 3) / 4) +#define ROW_INBYTES ((ROW_INBITS + 7) / 8) + +/* For CRYPTO_AEAD */ +#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8) +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES +#define CRYPTO_ABYTES CRYPTO_KEYBYTES +#define CRYPTO_NOOVERLAP 1 + +#define MAX_ASSOCIATED_DATA_LENGTH 32 +#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES) + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +/* For CRYPTO_HASH */ +#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8) + + + +#define DOMAIN_BITS 0x80 +#define PAD_BITS 0x01 +#define S384_R192_BITS 0x80 + +#if (STATE_INBITS==256) +#define C1 1 +#define C2 8 +#define C3 25 +#elif (STATE_INBITS==384) +#define C1 1 +#define C2 8 +#define C3 55 +#elif (STATE_INBITS==512) +#define C1 1 +#define C2 16 +#define C3 25 +#else +#error "Not specified state size" +#endif + +#ifdef CRYPTO_AEAD +/* For CRYPTO_AEAD */ +#define KEY_INBITS (CRYPTO_KEYBYTES * 8) +#define KEY_INBYTES (CRYPTO_KEYBYTES) + +#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8) +#define NONCE_INBYTES (CRYPTO_NPUBBYTES) + +#define TAG_INBITS (CRYPTO_ABYTES * 8) +#define TAG_INBYTES (CRYPTO_ABYTES) + +#if (KEY_INBITS==128) && (STATE_INBITS==256) +#define RATE_INBITS 64 +#define NR_0 52 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==128) && (STATE_INBITS==384) +#define RATE_INBITS 192 +#define NR_0 76 +#define NR_i 28 +#define NR_f 32 +#elif (KEY_INBITS==192) && (STATE_INBITS==384) +#define RATE_INBITS 96 +#define NR_0 76 +#define NR_i 40 +#define NR_f 44 +#elif (KEY_INBITS==256) && (STATE_INBITS==512) +#define RATE_INBITS 128 +#define NR_0 100 +#define NR_i 52 +#define NR_f 56 +#else +#error "Not specified key size and state size" +#endif + +#define RATE_INBYTES ((RATE_INBITS + 7) / 8) +#define SQUEEZE_RATE_INBYTES TAG_INBYTES + +#endif + +#ifdef CRYPTO_HASH +/* For CRYPTO_HASH */ +#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8) + +#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256) +#define HASH_RATE_INBITS 32 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 68 +#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 128 +#define HASH_SQUEEZE_RATE_INBITS 128 +#define NR_h 80 +#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384) +#define HASH_RATE_INBITS 48 +#define HASH_SQUEEZE_RATE_INBITS 192 +#define NR_h 104 +#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512) +#define HASH_RATE_INBITS 64 +#define HASH_SQUEEZE_RATE_INBITS 256 +#define NR_h 140 +#else +#error "Not specified hash digest size and state size" +#endif + +#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8) +#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8) + +#endif + +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot512/avr8_lowrom/crypto_hash.h b/knot/Implementations/crypto_hash/knot512/avr8_lowrom/crypto_hash.h new file mode 100644 index 0000000..342a639 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/avr8_lowrom/crypto_hash.h @@ -0,0 +1,13 @@ +#ifdef __cplusplus +extern "C" { +#endif + +int crypto_hash( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen + ); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot512/avr8_lowrom/encrypt.c b/knot/Implementations/crypto_hash/knot512/avr8_lowrom/encrypt.c new file mode 100644 index 0000000..baf0a3b --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/avr8_lowrom/encrypt.c @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#include "config.h" + +extern void crypto_aead_encrypt_asm( + unsigned char *c, + const unsigned char *m, + unsigned char mlen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern int crypto_aead_decrypt_asm( + unsigned char *m, + const unsigned char *c, + unsigned char clen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern void crypto_hash_asm( + unsigned char *out, + const unsigned char *in, + unsigned char inlen + ); + + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the cipher implementation goes here, + ... generating a ciphertext c[0],c[1],...,c[*clen-1] + ... from a plaintext m[0],m[1],...,m[mlen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce npub[0],npub[1],.. + ... and secret key k[0],k[1],... + ... the implementation shall not use nsec + ... + ... return 0; + */ + + (void)nsec; + + crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k); + + *clen = mlen + TAG_INBYTES; + return 0; +} + + + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the AEAD implementation goes here, + ... generating a plaintext m[0],m[1],...,m[*mlen-1] + ... and secret message number nsec[0],nsec[1],... + ... from a ciphertext c[0],c[1],...,c[clen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce number npub[0],npub[1],... + ... and secret key k[0],k[1],... + ... + ... return 0; + */ + unsigned long long mlen_; + unsigned char tag_is_match; + + (void)nsec; + if (clen < CRYPTO_ABYTES) { + return -1; + } + mlen_ = clen - CRYPTO_ABYTES; + + tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k); + + if (tag_is_match != 0) + { + memset(m, 0, (size_t)mlen_); + return -1; + } + + *mlen = mlen_; + return 0; +} \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot512/avr8_lowrom/encrypt_core.S b/knot/Implementations/crypto_hash/knot512/avr8_lowrom/encrypt_core.S new file mode 100644 index 0000000..cb7aed5 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/avr8_lowrom/encrypt_core.S @@ -0,0 +1,537 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; S R A M D E F I N I T I O N S +; ============================================ +; +#include +#include "config.h" + +.section .noinit + SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (STATE_INBYTES > 32) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (STATE_INBYTES > 48) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_LEN: .BYTE 0, 0 +#ifdef CRYPTO_AEAD +; For CRYPTO_AEAD + SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0 + SRAM_ADLEN: .BYTE 0, 0 + SRAM_NONCE_ADDR: .BYTE 0, 0 + SRAM_KEY_ADDR: .BYTE 0, 0 + + SRAM_ADDITIONAL: + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#if (CRYPTO_ABYTES > 16) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif +#if (CRYPTO_ABYTES > 24) + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 +#endif + +#endif + +.section .text + +#include "permutation.h" + +; require YH:YL be the address of the current associated data/cipher/message block +; for enc and dec, store ciphertext or plaintext +; require ZH:ZL be the address of the current cipher/message block +XOR_to_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov cnt0, rate +XOR_to_State_loop: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_to_State_loop +; YH:YL are now the address of the next associated data block +ret + +; require YH:YL pointed to the input data +; require ZH:ZL pointed to the output data +; require cnt0 containes the nubmer of bytes in source data +; require number of bytes in source data less than rate, i.e., 0 <= cnt0 < rate +; +; the 0th bit in AEDH is used to distinguish (auth AD) or (enc/dec M/C): +; AEDH[0] = 0 for (auth AD), AEDH[0] = 1 for (enc/dec M/C) +; the 1th bit in AEDH is used to distinguish (auth AD/enc M) or (dec C): +; AEDH[1] = 0 for (auth AD/enc M), AEDH[1] = 1 for (dec C) +; AEDH = 0b000 for (auth AD) +; AEDH = 0b001 for (enc M) +; AEDH = 0b011 for (dec C) +Pad_XOR_to_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + tst cnt0 + breq XOR_padded_data +XOR_source_data_loop: + ld tmp0, Y+ ; plaintext/ciphertext + ld tmp1, X ; state + eor tmp1, tmp0 ; ciphertext/plaintext + sbrc AEDH, 0 ; test auth or enc/dec, if AEDH[0] == 0, skip store result + st Z+, tmp1 ; store ciphertext/plaintext + sbrc AEDH, 1 ; test auth/enc or dec, if AEDH[1] == 0, skip repalce state byte + mov tmp1, tmp0 ; if dec, replace state + st X+, tmp1 ; store state byte + dec cnt0 + brne XOR_source_data_loop +XOR_padded_data: + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 +ret + +AddDomain: + ldi XH, hi8(SRAM_STATE + STATE_INBYTES - 1) + ldi XL, lo8(SRAM_STATE + STATE_INBYTES - 1) + ldi tmp0, DOMAIN_BITS + ld tmp1, X + eor tmp0, tmp1 + st X, tmp0 +ret + +; require ZH:ZL be the address of the destination +EXTRACT_from_State: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + mov tmp1, rate +EXTRACT_from_State_loop: + ld tmp0, X+ + st Z+, tmp0 + dec tmp1 + brne EXTRACT_from_State_loop +ret + +AUTH: + tst radlen + breq AUTH_end + + cp radlen, rate + brlo auth_ad_padded_block + +auth_ad_loop: + rcall XOR_to_State + rcall Permutation + sub radlen, rate + cp radlen, rate + brlo auth_ad_padded_block + rjmp auth_ad_loop + +auth_ad_padded_block: + mov cnt0, radlen + rcall Pad_XOR_to_State + rcall Permutation + +AUTH_end: +ret + +#ifdef CRYPTO_AEAD +Initialization: + ldi rn, NR_0 + ldi XL, lo8(SRAM_STATE) + ldi XH, hi8(SRAM_STATE) + + lds YH, SRAM_NONCE_ADDR + lds YL, SRAM_NONCE_ADDR + 1 + ldi cnt0, CRYPTO_NPUBBYTES +load_nonce_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_nonce_loop + + lds YH, SRAM_KEY_ADDR + lds YL, SRAM_KEY_ADDR + 1 + ldi cnt0, CRYPTO_KEYBYTES +load_key_loop: + ld tmp0, Y+ + st X+, tmp0 + dec cnt0 + brne load_key_loop + +#if (STATE_INBITS==384) && (RATE_INBITS==192) + ldi cnt0, (STATE_INBYTES - CRYPTO_NPUBBYTES - CRYPTO_KEYBYTES - 1) + clr tmp0 +empty_state_loop: + st X+, tmp0 + dec cnt0 + brne empty_state_loop + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + rcall Permutation +ret + +ENC: + tst mclen + breq ENC_end + + cp mclen, rate + brlo enc_padded_block + +enc_loop: + rcall XOR_to_State + ldi rn, NR_i + rcall Permutation + sub mclen, rate + cp mclen, rate + brlo enc_padded_block + rjmp enc_loop + +enc_padded_block: + mov cnt0, mclen + rcall Pad_XOR_to_State +ENC_end: +ret + +Finalization: + ldi rate, SQUEEZE_RATE_INBYTES + ldi rn, NR_f + rcall Permutation + rcall EXTRACT_from_State +ret + +; void crypto_aead_encrypt_asm( +; unsigned char *c, +; const unsigned char *m, +; unsigned long long mlen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *c, is passed in r24:r25 +; const unsigned char *m, is passed in r22:r23 +; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_encrypt_asm +crypto_aead_encrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b001 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + rcall Finalization + POP_ALL +ret + +; int crypto_aead_decrypt_asm( +; unsigned char *m, +; const unsigned char *c, +; unsigned long long clen, +; const unsigned char *ad, +; unsigned long long adlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *m, is passed in r24:r25 +; const unsigned char *c, is passed in r22:r23 +; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long adlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_decrypt_asm +crypto_aead_decrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov radlen, r16 + mov mclen, r20 + + rcall Initialization + + ldi rn, NR_i + ldi rate, RATE_INBYTES + ldi AEDH, 0b000 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_ASSOCIATED_DATA_ADDR + lds YL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall AUTH + rcall AddDomain + ldi AEDH, 0b011 ; AEDH = 0b000 for (auth AD), AEDH = 0b001 for (enc M), AEDH = 0b011 for (dec C) + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + rcall ENC + + ldi ZH, hi8(SRAM_ADDITIONAL) + ldi ZL, lo8(SRAM_ADDITIONAL) + rcall Finalization + + sbiw ZL, CRYPTO_ABYTES + ldi cnt0, CRYPTO_ABYTES +compare_tag: + ld tmp0, Z+ + ld tmp1, Y+ + cp tmp0, tmp1 + brne return_tag_not_match + dec cnt0 + brne compare_tag + rjmp return_tag_match + +return_tag_not_match: + ldi r25, 0xFF + ldi r24, 0xFF + rjmp crypto_aead_decrypt_end + +return_tag_match: + clr r25 + clr r24 +crypto_aead_decrypt_end: + POP_ALL +ret + +; #ifdef CRYPTO_AEAD +#endif + + +#ifdef CRYPTO_HASH + +; void crypto_hash_asm( +; unsigned char *out, +; const unsigned char *in, +; unsigned long long inlen +; ) +; +; unsigned char *out, is passed in r24:r25 +; const unsigned char *in, is passed in r22:r23 +; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used +.global crypto_hash_asm +crypto_hash_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + mov mclen, r20 + + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi cnt0, STATE_INBYTES - 1 +#else + ldi cnt0, STATE_INBYTES +#endif + clr tmp0 +zero_state: + st X+, tmp0 + dec cnt0 + brne zero_state + +#if (STATE_INBITS==384) && (HASH_RATE_INBITS==128) + ldi tmp0, S384_R192_BITS + st X+, tmp0 +#endif + + ldi rn, NR_h + ldi AEDH, 0b100 + +HASH_ABSORBING: + mov radlen, mclen + tst radlen + breq EMPTY_M + ldi rate, HASH_RATE_INBYTES + lds YH, SRAM_MESSAGE_IN_ADDR + lds YL, SRAM_MESSAGE_IN_ADDR + 1 + rcall AUTH + rjmp HASH_SQUEEZING + +EMPTY_M: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + ldi tmp0, PAD_BITS + ld tmp1, X + eor tmp1, tmp0 + st X, tmp1 + rcall Permutation + +HASH_SQUEEZING: + ldi rate, HASH_SQUEEZE_RATE_INBYTES + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + ldi tcnt, CRYPTO_BYTES +SQUEEZING_loop: + rcall EXTRACT_from_State + subi tcnt, HASH_SQUEEZE_RATE_INBYTES + breq HASH_SQUEEZING_end + rcall Permutation + rjmp SQUEEZING_loop +HASH_SQUEEZING_end: + POP_ALL +ret + +#endif + + +; Byte Order In AVR 8: +; KNOT-AEAD(128, 256, 64): +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[7] <<< 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Perm_row_1[0] 1 +; N[ 9] AEAD_State[ 9] | Perm_row_1[1] 1 +; N[10] AEAD_State[10] | Perm_row_1[2] 1 +; N[11] AEAD_State[11] | Perm_row_1[3] 1 +; N[12] AEAD_State[12] | Perm_row_1[4] 1 +; N[13] AEAD_State[13] | Perm_row_1[5] 1 +; N[14] AEAD_State[14] | Perm_row_1[6] 1 +; N[15] AEAD_State[15] | Perm_row_1[7] <<< 1 +; K[ 0] AEAD_State[16] | Perm_row_2[0] 8 +; K[ 1] AEAD_State[17] | Perm_row_2[1] 8 +; K[ 2] AEAD_State[18] | Perm_row_2[2] 8 +; K[ 3] AEAD_State[19] | Perm_row_2[3] 8 +; K[ 4] AEAD_State[20] | Perm_row_2[4] 8 +; K[ 5] AEAD_State[21] | Perm_row_2[5] 8 +; K[ 6] AEAD_State[22] | Perm_row_2[6] 8 +; K[ 7] AEAD_State[23] | Perm_row_2[7] <<< 8 +; K[ 8] AEAD_State[24] | Perm_row_3[0] 25 +; K[ 9] AEAD_State[25] | Perm_row_3[1] 25 +; K[10] AEAD_State[26] | Perm_row_3[2] 25 +; K[11] AEAD_State[27] | Perm_row_3[3] 25 +; K[12] AEAD_State[28] | Perm_row_3[4] 25 +; K[13] AEAD_State[29] | Perm_row_3[5] 25 +; K[14] AEAD_State[30] | Perm_row_3[6] 25 +; K[15] AEAD_State[31] | ^0x80 Perm_row_3[7] <<< 25 +; +; +; KNOT-AEAD(128, 384, 192): +; Initalization +; N[ 0] AEAD_State[ 0] | Message[ 0] Perm_row_0[ 0] 0 Tag[ 0] +; N[ 1] AEAD_State[ 1] | Message[ 1] Perm_row_0[ 1] 0 Tag[ 1] +; N[ 2] AEAD_State[ 2] | Message[ 2] Perm_row_0[ 2] 0 Tag[ 2] +; N[ 3] AEAD_State[ 3] | Message[ 3] Perm_row_0[ 3] 0 Tag[ 3] +; N[ 4] AEAD_State[ 4] | Message[ 4] 0x01 Perm_row_0[ 4] 0 Tag[ 4] +; N[ 5] AEAD_State[ 5] | Message[ 5] 0x00 Perm_row_0[ 5] 0 Tag[ 5] +; N[ 6] AEAD_State[ 6] | Message[ 6] 0x00 Perm_row_0[ 6] 0 Tag[ 6] +; N[ 7] AEAD_State[ 7] | Message[ 7] 0x00 Perm_row_0[ 7] 0 Tag[ 7] +; N[ 8] AEAD_State[ 8] | Message[ 8] 0x00 Perm_row_0[ 8] 0 Tag[ 8] +; N[ 9] AEAD_State[ 9] | Message[ 9] 0x00 Perm_row_0[ 9] 0 Tag[ 9] +; N[10] AEAD_State[10] | Message[10] 0x00 Perm_row_0[10] 0 Tag[10] +; N[11] AEAD_State[11] | Message[11] 0x00 Perm_row_0[11] <<< 0 Tag[11] +; N[12] AEAD_State[12] | Message[12] 0x00 Perm_row_1[ 0] 1 Tag[12] +; N[13] AEAD_State[13] | Message[13] 0x00 Perm_row_1[ 1] 1 Tag[13] +; N[14] AEAD_State[14] | Message[14] 0x00 Perm_row_1[ 2] 1 Tag[14] +; N[15] AEAD_State[15] | Message[15] 0x00 Perm_row_1[ 3] 1 Tag[15] +; K[ 0] AEAD_State[16] | Message[16] 0x00 Perm_row_1[ 4] 1 +; K[ 1] AEAD_State[17] | Message[17] 0x00 Perm_row_1[ 5] 1 +; K[ 2] AEAD_State[18] | Message[18] 0x00 Perm_row_1[ 6] 1 +; K[ 3] AEAD_State[19] | Message[19] 0x00 Perm_row_1[ 7] 1 +; K[ 4] AEAD_State[20] | Message[20] 0x00 Perm_row_1[ 8] 1 +; K[ 5] AEAD_State[21] | Message[21] 0x00 Perm_row_1[ 9] 1 +; K[ 6] AEAD_State[22] | Message[22] 0x00 Perm_row_1[10] 1 +; K[ 7] AEAD_State[23] | Message[23] 0x00 Perm_row_1[11] <<< 1 +; K[ 8] AEAD_State[24] | Perm_row_2[ 0] 8 +; K[ 9] AEAD_State[25] | Perm_row_2[ 1] 8 +; K[10] AEAD_State[26] | Perm_row_2[ 2] 8 +; K[11] AEAD_State[27] | Perm_row_2[ 3] 8 +; K[12] AEAD_State[28] | Perm_row_2[ 4] 8 +; K[13] AEAD_State[29] | Perm_row_2[ 5] 8 +; K[14] AEAD_State[30] | Perm_row_2[ 6] 8 +; K[15] AEAD_State[31] | Perm_row_2[ 7] 8 +; 0x00 AEAD_State[32] | Perm_row_2[ 8] 8 +; 0x00 AEAD_State[33] | Perm_row_2[ 9] 8 +; 0x00 AEAD_State[34] | Perm_row_2[10] 8 +; 0x00 AEAD_State[35] | Perm_row_2[11] <<< 8 +; 0x00 AEAD_State[36] | Perm_row_3[ 0] 55 +; 0x00 AEAD_State[37] | Perm_row_3[ 1] 55 +; 0x00 AEAD_State[38] | Perm_row_3[ 2] 55 +; 0x00 AEAD_State[39] | Perm_row_3[ 3] 55 +; 0x00 AEAD_State[40] | Perm_row_3[ 4] 55 +; 0x00 AEAD_State[41] | Perm_row_3[ 5] 55 +; 0x00 AEAD_State[42] | Perm_row_3[ 6] 55 +; 0x00 AEAD_State[43] | Perm_row_3[ 7] 55 +; 0x00 AEAD_State[44] | Perm_row_3[ 8] 55 +; 0x00 AEAD_State[45] | Perm_row_3[ 9] 55 +; 0x00 AEAD_State[46] | Perm_row_3[10] 55 +; 0x00 ^0x80 AEAD_State[47] | ^0x80 Perm_row_3[11] <<< 55 diff --git a/knot/Implementations/crypto_hash/knot512/avr8_lowrom/hash.c b/knot/Implementations/crypto_hash/knot512/avr8_lowrom/hash.c new file mode 100644 index 0000000..dbbe4df --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/avr8_lowrom/hash.c @@ -0,0 +1,32 @@ +#include +#include +#include +#include +#include "api.h" +#include "crypto_hash.h" + +extern void crypto_hash_asm( + unsigned char *out, + const unsigned char *in, + unsigned char inlen + ); + +int crypto_hash( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen +) +{ + /* + ... + ... the code for the hash function implementation goes here + ... generating a hash value out[0],out[1],...,out[CRYPTO_BYTES-1] + ... from a message in[0],in[1],...,in[in-1] + ... + ... return 0; + */ + + crypto_hash_asm(out, in, inlen); + + return 0; +} \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot512/avr8_lowrom/knot256.h b/knot/Implementations/crypto_hash/knot512/avr8_lowrom/knot256.h new file mode 100644 index 0000000..d16bf8c --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/avr8_lowrom/knot256.h @@ -0,0 +1,197 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +#include "assist.h" + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi rc, 0x01 + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + +round_loop_start: + rjmp AddRC_SubColumns_Start + +load_columns_table: + rjmp load_column0 + rjmp load_column1 + rjmp load_column2 + rjmp load_column3 + rjmp load_column4 + rjmp load_column5 + rjmp load_column6 + rjmp load_column7 + rjmp amend_shiftRow + +load_column0: + mov x3j, x30 + rjmp Sbox_one_column +load_column1: + mov x30, x3j + mov x3j, x31 + rjmp Sbox_one_column +load_column2: + mov x31, x3j + mov x3j, x32 + rjmp Sbox_one_column +load_column3: + mov x32, x3j + mov x3j, x33 + rjmp Sbox_one_column +load_column4: + mov x33, x3j + mov x3j, x34 + rjmp Sbox_one_column +load_column5: + mov x34, x3j + mov x3j, x35 + rjmp Sbox_one_column +load_column6: + mov x35, x3j + mov x3j, x36 + rjmp Sbox_one_column +load_column7: + mov x36, x3j + mov x3j, x37 + rjmp Sbox_one_column + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +LFSR_table: + rjmp LFSR6 + rjmp LFSR7 +LFSR6: + LFSR6_MACRO + rjmp LFSR_DONE +LFSR7: + LFSR7_MACRO + rjmp LFSR_DONE +#endif + +;;;;;;;;;;;;;;;;;;;;;;;; Real Start +AddRC_SubColumns_Start: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + clr ccnt + ld x0j, Y + eor x0j, rc + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + ldi ZL, pm_lo8(LFSR_table) + ldi ZH, pm_hi8(LFSR_table) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[1] = 1 for HASH + adiw ZL, 1 + ijmp +LFSR_DONE: +#elif defined(CRYPTO_AEAD) + LFSR6_MACRO ; only AEAD +#else + LFSR7_MACRO ; only HASH +#endif + + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + ldi ZL, pm_lo8(load_columns_table) + ldi ZH, pm_hi8(load_columns_table) + ijmp +Sbox_one_column: + Sbox x0j, x1j, x2j, x3j + + ; 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- x- -- 1 + ; -- -- -- -- x' -- -- -- 3 + ; 4 3 2 1 0 7 6 5 + ; Store a byte to Row 0 + st Y, x0j + ; Store a byte combined with ShiftRow1 + lsl t1j + mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left) + rol x1j + std Y + ROW_INBYTES, x1j + ; Store a byte combined with ShiftRow2 + inc ccnt + cpi ccnt, ROW_INBYTES + breq ROW2_WRAP + ldd t2j, Y + 2 * ROW_INBYTES + 1 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t2j + jmp NO_ROW2_WRAP +ROW2_WRAP: + std Y + ROW_INBYTES + 1, x2j + ; remain ShiftRow3 to be done at 'amend_shiftRow' +NO_ROW2_WRAP: + adiw YL, 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + + adiw ZL, 1 + ijmp + +amend_shiftRow: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + + ld x1j, Y + bst t1j, 7 + bld x1j, 0 + st Y, x1j + + ; <<< 1 + mov x37, x3j + rol x3j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + ; <<< 24 + ; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5 + mov x3j, x30 + mov x30, x35 + mov x35, x32 + mov x32, x37 + mov x37, x34 + mov x34, x31 + mov x31, x36 + mov x36, x33 + mov x33, x3j + + dec rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + + POP_CONFLICT +ret \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot512/avr8_lowrom/knot384.h b/knot/Implementations/crypto_hash/knot512/avr8_lowrom/knot384.h new file mode 100644 index 0000000..65c474a --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/avr8_lowrom/knot384.h @@ -0,0 +1,219 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +#include "assist.h" + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi rc, 0x01 + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + +round_loop_start: + rjmp AddRC_SubColumns_Start + +load_columns_table: + rjmp load_column0 + rjmp load_column1 + rjmp load_column2 + rjmp load_column3 + rjmp load_column4 + rjmp load_column5 + rjmp load_column6 + rjmp load_column7 + rjmp load_column8 + rjmp load_column9 + rjmp load_columna + rjmp load_columnb + rjmp amend_shiftRow + +load_column0: + mov x3j, x30 + rjmp Sbox_one_column +load_column1: + mov x30, x3j + mov x3j, x31 + rjmp Sbox_one_column +load_column2: + mov x31, x3j + mov x3j, x32 + rjmp Sbox_one_column +load_column3: + mov x32, x3j + mov x3j, x33 + rjmp Sbox_one_column +load_column4: + mov x33, x3j + mov x3j, x34 + rjmp Sbox_one_column +load_column5: + mov x34, x3j + mov x3j, x35 + rjmp Sbox_one_column +load_column6: + mov x35, x3j + mov x3j, x36 + rjmp Sbox_one_column +load_column7: + mov x36, x3j + mov x3j, x37 + rjmp Sbox_one_column +load_column8: + mov x37, x3j + mov x3j, x38 + rjmp Sbox_one_column +load_column9: + mov x38, x3j + mov x3j, x39 + rjmp Sbox_one_column +load_columna: + mov x39, x3j + mov x3j, x3a + rjmp Sbox_one_column +load_columnb: + mov x3a, x3j + mov x3j, x3b + rjmp Sbox_one_column + +;;;;;;;;;;;;;;;;;;;;;;;; Real Start +AddRC_SubColumns_Start: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + ldi ZL, pm_lo8(load_columns_table) + ldi ZH, pm_hi8(load_columns_table) + clr ccnt + ld x0j, Y + eor x0j, rc + LFSR7_MACRO + + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + ijmp +Sbox_one_column: + Sbox x0j, x1j, x2j, x3j + + ; b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- x- -- 1 + ; -- -- -- -- x' -- -- -- -- -- -- -- 7 + ; 4 3 2 1 0 b a 9 8 7 6 5 + ; Store a byte to Row 0 + st Y, x0j + ; Store a byte combined with ShiftRow 1 + lsl t1j + mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left) + rol x1j + std Y + ROW_INBYTES, x1j + ; Store a byte combined with ShiftRow 2 + inc ccnt + cpi ccnt, ROW_INBYTES + breq ROW2_WRAP + ldd t2j, Y + 2 * ROW_INBYTES + 1 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes + std Y + 2 * ROW_INBYTES + 1, x2j + mov x2j, t2j + jmp NO_ROW2_WRAP +ROW2_WRAP: + std Y + ROW_INBYTES + 1, x2j + ; remain ShiftRow3 to be done at 'amend_shiftRow' +NO_ROW2_WRAP: + adiw YL, 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + + adiw ZL, 1 + ijmp + +amend_shiftRow: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + + ld x1j, Y + bst t1j, 7 + bld x1j, 0 + st Y, x1j + + ; >>> 1 + mov x3b, x3j + ror x3j + ror x3a + ror x39 + ror x38 + ror x37 + ror x36 + ror x35 + ror x34 + ror x33 + ror x32 + ror x31 + ror x30 + ror x3b + ; <<< 56 + ; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5 + ;mov x3j, x30 + ;mov x30, x35 + ;mov x35, x32 + ;mov x32, x37 + ;mov x37, x34 + ;mov x34, x31 + ;mov x31, x36 + ;mov x36, x33 + ;mov x33, x3j + mov x3j, x30 + mov x30, x35 + mov x35, x3a + mov x3a, x33 + mov x33, x38 + mov x38, x31 + mov x31, x36 + mov x36, x3b + mov x3b, x34 + mov x34, x39 + mov x39, x32 + mov x32, x37 + mov x37, x3j + + dec rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + + POP_CONFLICT +ret \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot512/avr8_lowrom/knot512.h b/knot/Implementations/crypto_hash/knot512/avr8_lowrom/knot512.h new file mode 100644 index 0000000..d24b353 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/avr8_lowrom/knot512.h @@ -0,0 +1,275 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; +#include "assist.h" + +Permutation: + PUSH_CONFLICT + mov rcnt, rn + + ldi rc, 0x01 + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + ld x30, Y+ + ld x31, Y+ + ld x32, Y+ + ld x33, Y+ + ld x34, Y+ + ld x35, Y+ + ld x36, Y+ + ld x37, Y+ + ld x38, Y+ + ld x39, Y+ + ld x3a, Y+ + ld x3b, Y+ + ld x3c, Y+ + ld x3d, Y+ + ld x3e, Y+ + ld x3f, Y+ + +round_loop_start: + rjmp AddRC_SubColumns_Start + +load_columns_table: + rjmp load_column0 + rjmp load_column1 + rjmp load_column2 + rjmp load_column3 + rjmp load_column4 + rjmp load_column5 + rjmp load_column6 + rjmp load_column7 + rjmp load_column8 + rjmp load_column9 + rjmp load_columna + rjmp load_columnb + rjmp load_columnc + rjmp load_columnd + rjmp load_columne + rjmp load_columnf + rjmp amend_shiftRow + +load_column0: + mov x3j, x30 + rjmp Sbox_one_column +load_column1: + mov x30, x3j + mov x3j, x31 + rjmp Sbox_one_column +load_column2: + mov x31, x3j + mov x3j, x32 + rjmp Sbox_one_column +load_column3: + mov x32, x3j + mov x3j, x33 + rjmp Sbox_one_column +load_column4: + mov x33, x3j + mov x3j, x34 + rjmp Sbox_one_column +load_column5: + mov x34, x3j + mov x3j, x35 + rjmp Sbox_one_column +load_column6: + mov x35, x3j + mov x3j, x36 + rjmp Sbox_one_column +load_column7: + mov x36, x3j + mov x3j, x37 + rjmp Sbox_one_column +load_column8: + mov x37, x3j + mov x3j, x38 + rjmp Sbox_one_column +load_column9: + mov x38, x3j + mov x3j, x39 + rjmp Sbox_one_column +load_columna: + mov x39, x3j + mov x3j, x3a + rjmp Sbox_one_column +load_columnb: + mov x3a, x3j + mov x3j, x3b + rjmp Sbox_one_column +load_columnc: + mov x3b, x3j + mov x3j, x3c + rjmp Sbox_one_column +load_columnd: + mov x3c, x3j + mov x3j, x3d + rjmp Sbox_one_column +load_columne: + mov x3d, x3j + mov x3j, x3e + rjmp Sbox_one_column +load_columnf: + mov x3e, x3j + mov x3j, x3f + rjmp Sbox_one_column + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) +LFSR_table: + rjmp LFSR7 + rjmp LFSR8 +LFSR7: + LFSR7_MACRO + rjmp LFSR_DONE +LFSR8: + LFSR8_MACRO + rjmp LFSR_DONE +#endif + +;;;;;;;;;;;;;;;;;;;;;;;; Real Start +AddRC_SubColumns_Start: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + clr ccnt + ld x0j, Y + eor x0j, rc + +#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) + ldi ZL, pm_lo8(LFSR_table) + ldi ZH, pm_hi8(LFSR_table) + sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[1] = 1 for HASH + adiw ZL, 1 + ijmp +LFSR_DONE: +#elif defined(CRYPTO_AEAD) + LFSR7_MACRO ; only AEAD +#else + LFSR8_MACRO ; only HASH +#endif + + ldd x1j, Y + ROW_INBYTES + ldd x2j, Y + 2 * ROW_INBYTES + ldd t2j, Y + 2 * ROW_INBYTES + 1 + ldi ZL, pm_lo8(load_columns_table) + ldi ZH, pm_hi8(load_columns_table) + ijmp +Sbox_one_column: + Sbox x0j, x1j, x2j, x3j + + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0 + ; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2 + ; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3 + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + ; Store a byte to Row 0 + st Y, x0j + ; Store a byte combined with ShiftRow1 + lsl t1j + mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left) + rol x1j + std Y + ROW_INBYTES, x1j + ; Store a byte combined with ShiftRow2 + inc ccnt + cpi ccnt, ROW_INBYTES - 1 + brsh ROW2_WRAP + ldd tmp0, Y + 2 * ROW_INBYTES + 2 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes + std Y + 2 * ROW_INBYTES + 2, x2j + mov x2j, t2j + mov t2j, tmp0 + jmp NO_ROW2_WRAP +ROW2_WRAP: + std Y + ROW_INBYTES + 2, x2j + mov x2j, t2j + + ; remain ShiftRow3 to be done at 'amend_shiftRow' +NO_ROW2_WRAP: + adiw YL, 1 + ld x0j, Y + ldd x1j, Y + ROW_INBYTES + + adiw ZL, 1 + ijmp + +amend_shiftRow: + ldi YH, hi8(SRAM_STATE + ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + ROW_INBYTES) + + ld x1j, Y + bst t1j, 7 + bld x1j, 0 + st Y, x1j + + ; <<< 1 + mov x3f, x3j + rol x3j + rol x30 + rol x31 + rol x32 + rol x33 + rol x34 + rol x35 + rol x36 + rol x37 + rol x38 + rol x39 + rol x3a + rol x3b + rol x3c + rol x3d + rol x3e + rol x3f + ; <<< 24 + ; f e d c b a 9 8 7 6 5 4 3 2 1 0 => + ; c b a 9 8 7 6 5 4 3 2 1 0 f e d + mov x3j, x30 + mov x30, x3d + mov x3d, x3a + mov x3a, x37 + mov x37, x34 + mov x34, x31 + mov x31, x3e + mov x3e, x3b + mov x3b, x38 + mov x38, x35 + mov x35, x32 + mov x32, x3f + mov x3f, x3c + mov x3c, x39 + mov x39, x36 + mov x36, x33 + mov x33, x3j + + dec rcnt + breq round_loop_end + rjmp round_loop_start + +round_loop_end: + + ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES) + ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES) + st Y+, x30 + st Y+, x31 + st Y+, x32 + st Y+, x33 + st Y+, x34 + st Y+, x35 + st Y+, x36 + st Y+, x37 + st Y+, x38 + st Y+, x39 + st Y+, x3a + st Y+, x3b + st Y+, x3c + st Y+, x3d + st Y+, x3e + st Y+, x3f + + POP_CONFLICT +ret \ No newline at end of file diff --git a/knot/Implementations/crypto_hash/knot512/avr8_lowrom/permutation.h b/knot/Implementations/crypto_hash/knot512/avr8_lowrom/permutation.h new file mode 100644 index 0000000..a57c5d3 --- /dev/null +++ b/knot/Implementations/crypto_hash/knot512/avr8_lowrom/permutation.h @@ -0,0 +1,109 @@ +; +; ********************************************** +; * KNOT: a family of bit-slice lightweight * +; * authenticated encryption algorithms * +; * and hash functions * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by KNOT Team * +; ********************************************** +; + +; +; ============================================ +; R E G I S T E R D E F I N I T I O N S +; ============================================ +; + +#define mclen r16 +#define radlen r17 +#define tcnt r17 +#define tmp0 r20 +#define tmp1 r21 +#define cnt0 r22 +#define rn r23 +#define rate r24 + +; +; ; AEDH = 0b000: for authenticate AD +; ; AEDH = 0b001: for encryption +; ; AEDH = 0b011: for decryption +; ; AEDH = 0b100: for hash +; #define AEDH r25 ; Register used globally within this program +; +; #define x30 r0 ; Register used without overlapping +; #define x31 r1 ; Register used without overlapping +; #define x32 r2 ; Register used without overlapping +; #define x33 r3 ; Register used without overlapping +; #define x34 r4 ; Register used without overlapping +; #define x35 r5 ; Register used without overlapping +; #define x36 r6 ; Register used without overlapping +; #define x37 r7 ; Register used without overlapping +; #define x38 r8 ; Register used without overlapping +; #define x39 r9 ; Register used without overlapping +; #define x3a r10 ; Register used without overlapping +; #define x3b r11 ; Register used without overlapping +; #define x3c r12 ; Register used without overlapping +; #define x3d r13 ; Register used without overlapping +; #define x3e r14 ; Register used without overlapping +; #define x3f r15 ; Register used without overlapping +; +; #define x0j r16 ; Register used overlapped, should be backed up before using +; #define x1j r17 ; Register used overlapped, should be backed up before using +; #define x2j r18 ; Register used overlapped, should be backed up before using +; #define x3j r19 ; Register used overlapped, should be backed up before using +; +; ; t2j used in knot512 to keep one byte in Row2 (because of rotating 16-bit), +; ; will not be interupt with LFSR which uses the overlapped register tmp1 +; #define t2j r21 ; Temporary register, used freely +; #define t1j r22 ; Temporary register, used freely +; #define t3j r23 ; Temporary register, used freely +; +; #define rc r24 ; Register used overlapped, should be backed up before using +; #define rcnt r26 ; Register used overlapped, should be backed up before using +; #define ccnt r27 ; Register used overlapped, should be backed up before using + +#define AEDH r25 +#define x30 r0 +#define x31 r1 +#define x32 r2 +#define x33 r3 +#define x34 r4 +#define x35 r5 +#define x36 r6 +#define x37 r7 +#define x38 r8 +#define x39 r9 +#define x3a r10 +#define x3b r11 +#define x3c r12 +#define x3d r13 +#define x3e r14 +#define x3f r15 + +#define x0j r16 +#define x1j r17 +#define x2j r18 +#define x3j r19 + +; t2j used in knot512 to keep one byte in Row2 (because of rotating 16-bit), +; will not be interupt with LFSR which uses the overlapped register tmp1 +#define t2j r21 +#define t1j r22 +#define t3j r23 + +#define rc r24 +#define rcnt r26 +#define ccnt r27 + +#if (STATE_INBITS==256) +#include "knot256.h" +#elif (STATE_INBITS==384) +#include "knot384.h" +#elif (STATE_INBITS==512) +#include "knot512.h" +#else +#error "Not specified key size and state size" +#endif + +