From 109ff80f6482b99aa1418c86b2ebf5114f94bbb1 Mon Sep 17 00:00:00 2001 From: 包珍珍 Date: Fri, 11 Sep 2020 07:21:49 +0000 Subject: [PATCH] photon-beetle --- photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/api.h | 5 +++++ photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/assist.h | 140 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/crypto_aead.h | 26 ++++++++++++++++++++++++++ photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/encrypt.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/encrypt_core.S | 733 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/photon.h | 714 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/api.h | 5 +++++ photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/assist.h | 140 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/crypto_aead.h | 26 ++++++++++++++++++++++++++ photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/encrypt.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/encrypt_core.S | 734 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/photon.h | 714 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/api.h | 1 + photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/assist.h | 140 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/crypto_hash.h | 13 +++++++++++++ photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/encrypt_core.S | 734 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/hash.c | 32 ++++++++++++++++++++++++++++++++ photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/photon.h | 714 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 18 files changed, 5067 insertions(+) create mode 100644 photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/api.h create mode 100644 photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/assist.h create mode 100644 photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/crypto_aead.h create mode 100644 photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/encrypt.c create mode 100644 photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/encrypt_core.S create mode 100644 photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/photon.h create mode 100644 photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/api.h create mode 100644 photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/assist.h create mode 100644 photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/crypto_aead.h create mode 100644 photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/encrypt.c create mode 100644 photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/encrypt_core.S create mode 100644 photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/photon.h create mode 100644 photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/api.h create mode 100644 photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/assist.h create mode 100644 photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/crypto_hash.h create mode 100644 photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/encrypt_core.S create mode 100644 photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/hash.c create mode 100644 photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/photon.h diff --git a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/api.h b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/api.h new file mode 100644 index 0000000..4ceda96 --- /dev/null +++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/assist.h b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/assist.h new file mode 100644 index 0000000..fdf3c68 --- /dev/null +++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/assist.h @@ -0,0 +1,140 @@ +; +; ********************************************** +; * PHOTON-Beetle * +; * Authenticated Encryption and Hash Family * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by PHOTON-Beetle Team * +; ********************************************** +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Bitslice +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +.MACRO Reorder_8_bits i0, i1, i2, i3, i4 + ror \i0 + ror \i1 + ror \i0 + ror \i2 + ror \i0 + ror \i3 + ror \i0 + ror \i4 + ror \i0 + ror \i1 + ror \i0 + ror \i2 + ror \i0 + ror \i3 + ror \i0 + ror \i4 +.ENDM + +.MACRO InvReorder_8_bits i0, i1, i2, i3, i4 + ror \i1 + ror \i0 + ror \i2 + ror \i0 + ror \i3 + ror \i0 + ror \i4 + ror \i0 + ror \i1 + ror \i0 + ror \i2 + ror \i0 + ror \i3 + ror \i0 + ror \i4 + ror \i0 +.ENDM + +; require XH:XL be the address of the input +Load_Reorder_32_bits: + ldi cnt1, 4 +reorder_8_bits_loop: + ld rmp, X+ + Reorder_8_bits rmp, x0, x1, x2, x3 + dec cnt1 + brne reorder_8_bits_loop +ret + +; require YH:YL be the address of the output +invReorder_Store_32_bits: + ldi cnt1, 4 +invreorder_8_bits_loop: + InvReorder_8_bits rmp, x0, x1, x2, x3 + st Y+, rmp + dec cnt1 + brne invreorder_8_bits_loop +ret + +; require XH:XL be the address of the input +; require YH:YL be the address of the output +Load_Reorder_Store_128_bits: + ldi cnt0, 4 +reorder_32_bits_loop: + rcall Load_Reorder_32_bits + st Y+, x0 + st Y+, x1 + st Y+, x2 + st Y+, x3 + dec cnt0 + brne reorder_32_bits_loop +ret + +; require XH:XL be the address of the input +; require YH:YL be the address of the output +Load_invReorder_Store_128_bits: + ldi cnt0, 4 +invreorder_32_bits_loop: + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + rcall invReorder_Store_32_bits + dec cnt0 + brne invreorder_32_bits_loop +ret + +.macro PUSH_ALL +push r2 +push r3 +push r4 +push r5 +push r6 +push r7 +push r8 +push r9 +push r10 +push r11 +push r12 +push r13 +push r14 +push r15 +push r16 +push r17 +push r28 +push r29 +.endm + +.macro POP_ALL +pop r29 +pop r28 +pop r17 +pop r16 +pop r15 +pop r14 +pop r13 +pop r12 +pop r11 +pop r10 +pop r9 +pop r8 +pop r7 +pop r6 +pop r5 +pop r4 +pop r3 +pop r2 +clr r1 +.endm diff --git a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/crypto_aead.h b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/crypto_aead.h new file mode 100644 index 0000000..cd820d3 --- /dev/null +++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/crypto_aead.h @@ -0,0 +1,26 @@ +#ifdef __cplusplus +extern "C" { +#endif + +int crypto_aead_encrypt( + unsigned char *c,unsigned long long *clen, + const unsigned char *m,unsigned long long mlen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ); + + +int crypto_aead_decrypt( + unsigned char *m,unsigned long long *outputmlen, + unsigned char *nsec, + const unsigned char *c,unsigned long long clen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ); + +#ifdef __cplusplus +} +#endif diff --git a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/encrypt.c b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/encrypt.c new file mode 100644 index 0000000..31bc9c4 --- /dev/null +++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/encrypt.c @@ -0,0 +1,98 @@ +#include +#include +#include +#include +#include "api.h" + +extern void crypto_aead_encrypt_asm( + unsigned char *c, + const unsigned char *m, + unsigned char mlen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern char crypto_aead_decrypt_asm( + unsigned char *m, + const unsigned char *c, + unsigned char clen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the cipher implementation goes here, + ... generating a ciphertext c[0],c[1],...,c[*clen-1] + ... from a plaintext m[0],m[1],...,m[mlen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce npub[0],npub[1],.. + ... and secret key k[0],k[1],... + ... the implementation shall not use nsec + ... + ... return 0; + */ + + (void)nsec; + + crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k); + + *clen = mlen + CRYPTO_ABYTES; + return 0; +} + + + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the AEAD implementation goes here, + ... generating a plaintext m[0],m[1],...,m[*mlen-1] + ... and secret message number nsec[0],nsec[1],... + ... from a ciphertext c[0],c[1],...,c[clen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce number npub[0],npub[1],... + ... and secret key k[0],k[1],... + ... + ... return 0; + */ + unsigned long long mlen_; + char tag_is_match; + + (void)nsec; + if (clen < CRYPTO_ABYTES) { + return -1; + } + mlen_ = clen - CRYPTO_ABYTES; + + tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k); + + if (tag_is_match != 0) + { + memset(m, 0, (size_t)mlen_); + return -1; + } + *mlen = mlen_; + return 0; +} \ No newline at end of file diff --git a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/encrypt_core.S b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/encrypt_core.S new file mode 100644 index 0000000..d9ebcc0 --- /dev/null +++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/encrypt_core.S @@ -0,0 +1,733 @@ +; +; ********************************************** +; * PHOTON-Beetle * +; * Authenticated Encryption and Hash Family * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by PHOTON-Beetle Team * +; ********************************************** +; +#include "api.h" + +#define CRYPTO_AEAD + +#define STATE_INBITS 256 +#define STATE_INBYTES ((STATE_INBITS + 7) / 8) +#define RATE_INBITS 128 +#define RATE_INBYTES ((RATE_INBITS + 7) / 8) +#define RATE_INBYTES_MASK (RATE_INBYTES - 1) +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + + +; For CRYPTO_HASH +#define CRYPTO_BYTES 32 +#define INITIAL_RATE_INBITS 128 +#define INITIAL_RATE_INBYTES ((INITIAL_RATE_INBITS + 7) / 8) +#define HASH_RATE_INBITS 32 +#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8) +#define HASH_RATE_INBYTES_MASK (HASH_RATE_INBYTES - 1) + +; +; ============================================ +; R E G I S T E R D E F I N I T I O N S +; ============================================ +; +; [Add all register names here, include info on +; all used registers without specific names] +; rmp: Multipurpose register +#define rmp r16 +#define rate r17 +#define mclen r18 +#define radlen r19 +#define adlen_org r0 + +#define cnt0 r20 +#define cnt1 r21 +#define cnt2 r22 + +#define SQUEEZE_RATE_INBITS 128 +#define SQUEEZE_RATE_INBYTES ((SQUEEZE_RATE_INBITS + 7) / 8) +#define SQUEEZE_RATE_MASK (SQUEEZE_RATE_INBYTES - 1) + +#define CAPACITY_INBITS (STATE_INBITS - RATE_INBITS) +#define CAPACITY_INBYTES ((CAPACITY_INBITS + 7) / 8) + +; For CRYPTO_AEAD +#define KEY_INBITS (CRYPTO_KEYBYTES * 8) +#define KEY_INBYTES (CRYPTO_KEYBYTES) + +#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8) +#define NONCE_INBYTES (CRYPTO_NPUBBYTES) + +#define TAG_INBITS (CRYPTO_ABYTES * 8) +#define TAG_INBYTES (CRYPTO_ABYTES) + +#define t0 r8 +#define t1 r9 +#define t2 r10 +#define t3 r11 + +#define x0 r12 +#define x1 r13 +#define x2 r14 +#define x3 r15 + +#define ed r1 + +#define addr0 r2 +#define addr1 r3 +#define addr2 r4 +#define addr3 r5 +#define addr4 r6 +#define addr5 r7 + +; domain_cnt overlap with cnt0, only temporarily used, no need to back up +#define domain_cnt r20 +#define domain_cnt0 r23 +#define domain_cnt1 r24 + +.section .noinit + SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_LEN: .BYTE 0, 0 +#ifdef CRYPTO_AEAD +; For CRYPTO_AEAD + SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0 + SRAM_ADLEN: .BYTE 0, 0 + SRAM_NONCE_ADDR: .BYTE 0, 0 + SRAM_KEY_ADDR: .BYTE 0, 0 + SRAM_ADDITIONAL: .BYTE 0, 0, 0, 0 + .BYTE 0, 0, 0, 0 + .BYTE 0, 0, 0, 0 + .BYTE 0, 0, 0, 0 +#endif + +; SRAM required additionally, besides those used for API + SRAM_PAD: .BYTE 0, 0, 0, 0 +#if ((defined(CRYPTO_AEAD) && (RATE_INBYTES > 4)) || defined(CRYPTO_HASH)) + .BYTE 0, 0, 0, 0 + .BYTE 0, 0, 0, 0 + .BYTE 0, 0, 0, 0 +#endif + +.section .text + +#include "assist.h" +#include "photon.h" + +AddDomainCounter: + ldi YH, hi8(SRAM_STATE + STATE_INBYTES - 3) + ldi YL, lo8(SRAM_STATE + STATE_INBYTES - 3) + ldi rmp, 0x80 + ldi cnt1, 3 +check_domain_bit: + ror domain_cnt + brcc no_xor + ld x0, Y + eor x0, rmp + st Y, x0 +no_xor: + adiw YL, 1 + dec cnt1 + brne check_domain_bit +ret + +; require XH:XL be the address of the current associated data/message block +XOR_to_State: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + mov cnt0, rate + dec cnt0 +XOR_to_State_loop: + rcall Load_Reorder_32_bits + ld rmp, Y + eor rmp, x0 + st Y+, rmp + ld rmp, Y + eor rmp, x1 + st Y+, rmp + ld rmp, Y + eor rmp, x2 + st Y+, rmp + ld rmp, Y + eor rmp, x3 + st Y+, rmp + subi cnt0, 4 + brsh XOR_to_State_loop +; XH:XL are now the address of the next associated data/message block if this is not the last block +ret + +; require XH:XL pointed to the source data to be padded +PAD_OneZero: + ldi YH, hi8(SRAM_PAD) + ldi YL, lo8(SRAM_PAD) + mov cnt1, rate +pad_copy: + ld rmp, X+ + st Y+, rmp + dec cnt1 + dec cnt0 + brne pad_copy +pad_one: + ldi rmp, 1 + st Y+, rmp + dec cnt1 + breq pad_end + clr rmp +pad_zero: + st Y+, rmp + dec cnt1 + brne pad_zero +pad_end: + ldi XH, hi8(SRAM_PAD) + ldi XL, lo8(SRAM_PAD) +; XH:XL are now pointed to last block needed to be processed +ret + +HASH: + movw addr0, XL +hash_block_loop: + rcall PHOTON_Permutation + movw XL, addr0 + cp rate, radlen + brsh hash_last_block + rcall XOR_to_State + movw addr0, XL + sub radlen, rate + rjmp hash_block_loop + +hash_last_block: + cp radlen, rate + breq hash_xor_domain + mov cnt0, radlen + rcall PAD_OneZero + +hash_xor_domain: + clr radlen + rcall XOR_to_State + mov domain_cnt, domain_cnt0 + rcall AddDomainCounter +ret + +TAG: + rcall PHOTON_Permutation + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + movw YL, addr2 + rcall Load_invReorder_Store_128_bits +ret + +#ifdef CRYPTO_AEAD +.IF (RATE_INBITS == 128) +XOR_to_Cipher: + mov t2, rate + cp t2, mclen + brlo XOR_to_Cipher_Start + mov t2, mclen ; backup the real length of the remaining message + +XOR_to_Cipher_Start: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + ldi YH, hi8(SRAM_ADDITIONAL) + ldi YL, lo8(SRAM_ADDITIONAL) + rcall Load_invReorder_Store_128_bits ; State move to additional SRAM and reorder + + movw XL, addr0 + movw ZL, addr2 + + ; XOR Part 2 + sbiw YL, (RATE_INBYTES>>1) ; Pointed to Part 2 + ldi cnt0, (RATE_INBYTES>>1) + cp cnt0, mclen + brlo XOR_Part2_Store_Cipher_begin + mov cnt0, mclen +XOR_Part2_Store_Cipher_begin: + sub mclen, cnt0 +XOR_Part2_Store_Cipher_loop: + ld t0, Y+ + ld x0, X+ + eor x0, t0 + st Z+, x0 + dec cnt0 + brne XOR_Part2_Store_Cipher_loop + + cpi mclen, 1 + brlo XOR_to_Cipher_END + + ; XOR (Part 1 >>> 1) + ldi cnt0, (RATE_INBYTES>>1) + cp cnt0, mclen + brlo XOR_Part1_Store_Cipher_begin + mov cnt0, mclen +XOR_Part1_Store_Cipher_begin: + sub mclen, cnt0 + ldi YH, hi8(SRAM_ADDITIONAL) + ldi YL, lo8(SRAM_ADDITIONAL) + ld t0, Y + bst t0, 0 + adiw YL, (RATE_INBYTES>>1)-1 + ld t0, Y + ror t0 + bld t0, 7 + st Y, t0 + ldi cnt1, (RATE_INBYTES>>1)-1 +ROR_part1_loop: + ld t0, -Y + ror t0 + st Y, t0 + dec cnt1 + brne ROR_part1_loop + +XOR_Part1_Store_Cipher_loop: + ld t0, Y+ + ld x0, X+ + eor x0, t0 + st Z+, x0 + dec cnt0 + brne XOR_Part1_Store_Cipher_loop + +XOR_to_Cipher_END: + tst ed + brne XOR_to_Cipher_dec + +XOR_to_Cipher_enc: + movw XL, addr0 + cp t2, rate + brsh XOR_to_Cipher_XOR_to_State + mov cnt0, t2 + rcall PAD_OneZero + rjmp XOR_to_Cipher_XOR_to_State + +XOR_to_Cipher_dec: + movw XL, addr2 + cp t2, rate + brsh XOR_to_Cipher_XOR_to_State + ; need to be padded + mov cnt0, t2 + rcall PAD_OneZero + +XOR_to_Cipher_XOR_to_State: + rcall XOR_to_State + + clr rmp + add addr0, t2 + adc addr1, rmp + add addr2, t2 + adc addr3, rmp +ret +.ELSE +; RATE_INBITS == 32 +XOR_to_Cipher: + mov t2, rate + cp t2, mclen + brlo XOR_to_Cipher_Start + mov t2, mclen ; backup the real length of the remaining message + +XOR_to_Cipher_Start: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + ldi YH, hi8(SRAM_ADDITIONAL) + ldi YL, lo8(SRAM_ADDITIONAL) + rcall invReorder_Store_32_bits + + movw XL, addr0 + movw ZL, addr2 + + ; XOR Part 2 + sbiw YL, (RATE_INBYTES>>1) ; Pointed to Part 2 + ldi cnt0, (RATE_INBYTES>>1) + cp cnt0, mclen + brlo XOR_Part2_Store_Cipher_begin + mov cnt0, mclen +XOR_Part2_Store_Cipher_begin: + sub mclen, cnt0 +XOR_Part2_Store_Cipher_loop: + ld t0, Y+ + ld x0, X+ + eor x0, t0 + st Z+, x0 + dec cnt0 + brne XOR_Part2_Store_Cipher_loop + + cpi mclen, 1 + brlo XOR_to_Cipher_END + + ; XOR (Part 1 >>> 1) + ldi cnt0, (RATE_INBYTES>>1) + cp cnt0, mclen + brlo XOR_Part1_Store_Cipher_begin + mov cnt0, mclen +XOR_Part1_Store_Cipher_begin: + sub mclen, cnt0 + ldi YH, hi8(SRAM_ADDITIONAL) + ldi YL, lo8(SRAM_ADDITIONAL) + ld t0, Y+ + ld t1, Y+ + bst t0, 0 + ror t1 + ror t0 + bld t1, 7 + + ld x0, X+ + eor x0, t0 + st Z+, x0 + dec cnt0 + breq XOR_to_Cipher_END + ld x0, X+ + eor x0, t1 + st Z+, x0 + +XOR_to_Cipher_END: + tst ed + brne XOR_to_Cipher_dec + +XOR_to_Cipher_enc: + movw XL, addr0 + cp t2, rate + brsh XOR_to_Cipher_XOR_to_State + mov cnt0, t2 + rcall PAD_OneZero + rjmp XOR_to_Cipher_XOR_to_State + +XOR_to_Cipher_dec: + movw XL, addr2 + cp t2, rate + brsh XOR_to_Cipher_XOR_to_State + ; need to be padded + mov cnt0, t2 + rcall PAD_OneZero + +XOR_to_Cipher_XOR_to_State: + rcall XOR_to_State + + clr rmp + add addr0, t2 + adc addr1, rmp + add addr2, t2 + adc addr3, rmp +ret +.ENDIF + +ENC: + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + lds XH, SRAM_MESSAGE_IN_ADDR + lds XL, SRAM_MESSAGE_IN_ADDR + 1 + + movw addr0, XL + movw addr2, ZL + +enc_block_loop: + rcall PHOTON_Permutation + rcall XOR_to_Cipher + cpi mclen, 1 + brsh enc_block_loop + + mov domain_cnt, domain_cnt1 + rcall AddDomainCounter +ret + +AUTH_AND_ENCDEC: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + + lds XH, SRAM_NONCE_ADDR + lds XL, SRAM_NONCE_ADDR + 1 + rcall Load_Reorder_Store_128_bits + lds XH, SRAM_KEY_ADDR + lds XL, SRAM_KEY_ADDR + 1 + rcall Load_Reorder_Store_128_bits + + ldi domain_cnt0, 1 + ldi domain_cnt1, 1 + +test_adlen_zero: + tst radlen + breq adlen_zero_test_mlen_zero + + ; radlen != 0 +adlen_nzero_test_mlen_zero: + tst mclen + brne test_adlen_divisible + ldi domain_cnt0, 3 +test_adlen_divisible: + mov rmp, radlen + andi rmp, RATE_INBYTES_MASK + breq hash_ad + inc domain_cnt0 ; 2 or 4 +hash_ad: + lds XH, SRAM_ASSOCIATED_DATA_ADDR + lds XL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall HASH + tst mclen + breq mlen_zero_inputout_address + rjmp test_mlen_divisible + +adlen_zero_test_mlen_zero: + ldi domain_cnt1, 5 + tst mclen + breq adlen_zero_mlen_zero + + ; mclen != 0 +test_mlen_divisible: + mov rmp, mclen + andi rmp, RATE_INBYTES_MASK + breq enc_dec_m + inc domain_cnt1 ; 2 or 6 +enc_dec_m: + rcall ENC + rjmp AUTH_AND_ENCDEC_end + +adlen_zero_mlen_zero: + ; empty message and empty associated data + ldi YH, hi8(SRAM_STATE + STATE_INBYTES - 3) + ldi YL, lo8(SRAM_STATE + STATE_INBYTES - 3) + ld x0, Y + ldi rmp, 0x80 + eor x0, rmp + st Y, x0 + +mlen_zero_inputout_address: + tst ed + brne dec_inputout_address +enc_inputout_address: + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + movw addr2, ZL + rjmp AUTH_AND_ENCDEC_end +dec_inputout_address: + lds ZH, SRAM_MESSAGE_IN_ADDR + lds ZL, SRAM_MESSAGE_IN_ADDR + 1 + movw addr0, ZL + +AUTH_AND_ENCDEC_end: +ret + + +; void crypto_aead_encrypt_asm( +; unsigned char *c, +; const unsigned char *m, +; unsigned long long mlen, +; const unsigned char *ad, +; unsigned long long radlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *c, is passed in r24:r25 +; const unsigned char *m, is passed in r22:r23 +; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long radlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_encrypt_asm +crypto_aead_encrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov mclen, r20 + mov radlen, r16 + + ldi rate, RATE_INBYTES + clr ed + + rcall AUTH_AND_ENCDEC + rcall TAG + + POP_ALL +ret + +; int crypto_aead_decrypt_asm( +; unsigned char *m, +; const unsigned char *c, +; unsigned long long clen, +; const unsigned char *ad, +; unsigned long long radlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *m, is passed in r24:r25 +; const unsigned char *c, is passed in r22:r23 +; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long radlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_decrypt_asm +crypto_aead_decrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov mclen, r20 + mov radlen, r16 + + ldi rate, RATE_INBYTES + clr ed + inc ed + + rcall AUTH_AND_ENCDEC + + ldi YH, hi8(SRAM_ADDITIONAL) + ldi YL, lo8(SRAM_ADDITIONAL) + movw addr2, YL + rcall TAG + + sbiw YL, CRYPTO_ABYTES + movw XL, addr0 + + ldi cnt0, CRYPTO_ABYTES +compare_tag: + ld t0, Y+ + ld x0, X+ + cp t0, x0 + brne return_tag_not_match + dec cnt0 + brne compare_tag + rjmp return_tag_match + +return_tag_not_match: + ldi r25, 0xFF + ldi r24, 0xFF + rjmp crypto_aead_decrypt_end +return_tag_match: + clr r25 + clr r24 + +crypto_aead_decrypt_end: + POP_ALL +ret + +; #ifdef CRYPTO_AEAD +#endif + +#ifdef CRYPTO_HASH +; void crypto_hash_asm( +; unsigned char *out, +; const unsigned char *in, +; unsigned long long inlen +; ) +; +; unsigned char *out, is passed in r24:r25 +; const unsigned char *in, is passed in r22:r23 +; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used +.global crypto_hash_asm +crypto_hash_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + mov mclen, r20 + + ; empty half state + ldi YH, hi8(SRAM_STATE + INITIAL_RATE_INBYTES) + ldi YL, lo8(SRAM_STATE + INITIAL_RATE_INBYTES) + clr rmp + ldi cnt1, (STATE_INBYTES - INITIAL_RATE_INBYTES) +zero_state: + st Y+, rmp + dec cnt1 + brne zero_state + + ldi domain_cnt0, 1 + sbiw YL, (STATE_INBYTES - INITIAL_RATE_INBYTES) + lds XH, SRAM_MESSAGE_IN_ADDR + lds XL, SRAM_MESSAGE_IN_ADDR + 1 + + tst mclen + breq add_domain + +test_mlen_initrate: + ; mclen != 0 + cpi mclen, INITIAL_RATE_INBYTES + brlo less_than_initial_rate + breq equal_to_initial_rate + +more_than_initial_rate: + rcall Load_Reorder_Store_128_bits + ldi rate, HASH_RATE_INBYTES + mov radlen, mclen + subi radlen, INITIAL_RATE_INBYTES + mov rmp, radlen + andi rmp, HASH_RATE_INBYTES_MASK + breq hash_message + inc domain_cnt0 +hash_message: + rcall HASH + rjmp gen_digest + +equal_to_initial_rate: + inc domain_cnt0 + rcall Load_Reorder_Store_128_bits + rjmp add_domain + +less_than_initial_rate: + mov cnt0, mclen + ldi rate, INITIAL_RATE_INBYTES + rcall PAD_OneZero + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + rcall Load_Reorder_Store_128_bits + rjmp add_domain + +add_domain: + mov domain_cnt, domain_cnt0 + rcall AddDomainCounter +gen_digest: + lds XH, SRAM_MESSAGE_OUT_ADDR + lds XL, SRAM_MESSAGE_OUT_ADDR + 1 + movw addr2, XL + rcall TAG + movw XL, addr2 + adiw XL, SQUEEZE_RATE_INBYTES + movw addr2, XL + rcall TAG + + POP_ALL +ret + +#endif \ No newline at end of file diff --git a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/photon.h b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/photon.h new file mode 100644 index 0000000..d764e16 --- /dev/null +++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_speed/photon.h @@ -0,0 +1,714 @@ +; +; ********************************************** +; * PHOTON-Beetle * +; * Authenticated Encryption and Hash Family * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by PHOTON-Beetle Team * +; ********************************************** +; +#define ROUND_N 12 +#define DIM 8 + +.MACRO Store_OneRow + st X+, x0 + st X+, x1 + st X+, x2 + st X+, x3 +.ENDM + +.MACRO ROTL_1 i0 + bst \i0, 7 + lsl \i0 + bld \i0, 0 +.ENDM + +.MACRO ROTR_1 i0 + bst \i0, 0 + lsr \i0 + bld \i0, 7 +.ENDM + +.MACRO ROTR_4 i0 + swap \i0 +.ENDM + +.MACRO ROTR_1_ROW + ROTR_1 x0 + ROTR_1 x1 + ROTR_1 x2 + ROTR_1 x3 +.ENDM + +.MACRO ROTL_1_ROW + ROTL_1 x0 + ROTL_1 x1 + ROTL_1 x2 + ROTL_1 x3 +.ENDM + +.MACRO ROTR_4_ROW + ROTR_4 x0 + ROTR_4 x1 + ROTR_4 x2 + ROTR_4 x3 +.ENDM + + +; For all mul2_GF16_0x13_xor: +; Input +; MSB........LSB +; x0=@0: x1=@1: x2=@2: x3=@3 + ; # define mul2_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \ + ; x3 = XOR (x3 ,x0); \ + ; } while (0) ; /* Output : ( MSB ) x1 ,x2 ,x3 , x0 ( LSB ) */ +.MACRO mul2_GF16_0x13_xor + ld t3, X+ + ld t2, X+ + ld t1, X+ + ld t0, X+ + eor t3, t0 + eor x0, t0 + eor x1, t3 + eor x2, t2 + eor x3, t1 +.ENDM + + ; # define mul4_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \ + ; x3 = XOR (x3 ,x0); x0 = XOR (x0 ,x1); \ + ; } while (0) ; /* Output : ( MSB ) x2 ,x3 ,x0 , x1 ( LSB ) */ +.MACRO mul4_GF16_0x13_xor + ld t3, X+ + ld t2, X+ + ld t1, X+ + ld t0, X+ + eor t3, t0 + eor t0, t1 + eor x0, t1 + eor x1, t0 + eor x2, t3 + eor x3, t2 +.ENDM + + ; # define mul5_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \ + ; x2 = XOR (x2 ,x0); x3 = XOR (x3 ,x1); \ + ; x1 = XOR (x1 ,x2); x0 = XOR (x0 ,x3); \ + ; } while (0) ; /* Output : ( MSB ) x2 ,x0 ,x1 , x3 ( LSB ) */ +.MACRO mul5_GF16_0x13_xor + ld t3, X+ + ld t2, X+ + ld t1, X+ + ld t0, X+ + eor t2, t0 + eor t3, t1 + eor t1, t2 + eor t0, t3 + eor x0, t3 + eor x1, t1 + eor x2, t0 + eor x3, t2 +.ENDM + + ; # define mul6_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \ + ; x3 = XOR (x3 ,x1); x1 = XOR (x1 ,x0); \ + ; x2 = XOR (x2 ,x1); x0 = XOR (x0 ,x2); \ + ; x2 = XOR (x2 ,x3); \ + ; } while (0) ; /* Output : ( MSB ) x0 ,x2 ,x3 , x1 ( LSB ) */ +.MACRO mul6_GF16_0x13_xor + ld t3, X+ + ld t2, X+ + ld t1, X+ + ld t0, X+ + eor t3, t1 + eor t1, t0 + eor t2, t1 + eor t0, t2 + eor t2, t3 + eor x0, t1 + eor x1, t3 + eor x2, t2 + eor x3, t0 +.ENDM + + ; # define mul8_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \ + ; x3 = XOR (x3 ,x0); x0 = XOR (x0 ,x1); \ + ; x1 = XOR (x1 ,x2); \ + ; } while (0) ; /* Output : ( MSB ) x3 ,x0 ,x1 , x2 ( LSB ) */ +.MACRO mul8_GF16_0x13_xor + ld t3, X+ + ld t2, X+ + ld t1, X+ + ld t0, X+ + eor t3, t0 + eor t0, t1 + eor t1, t2 + eor x0, t2 + eor x1, t1 + eor x2, t0 + eor x3, t3 +.ENDM + + ; # define mul11_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \ + ; x2 = XOR (x2 ,x0); x1 = XOR (x1 ,x3); \ + ; x0 = XOR (x0 ,x1); x3 = XOR (x3 ,x2); \ + ; } while (0) ; /* Output : ( MSB ) x1 ,x2 ,x0 , x3 ( LSB ) */ +.MACRO mulb_GF16_0x13_xor + ld t3, X+ + ld t2, X+ + ld t1, X+ + ld t0, X+ + eor t2, t0 + eor t1, t3 + eor t0, t1 + eor t3, t2 + eor x0, t3 + eor x1, t0 + eor x2, t2 + eor x3, t1 +.ENDM + + +.MACRO RoundFunction +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Start AddRC_Sbox_ShiftRow +AddRC_Sbox_ShiftRow_Start: + clr t3 + inc t3 + + ldi XL, lo8(SRAM_STATE) + ldi XH, hi8(SRAM_STATE) + + lpm t0, Z+ ; Load two nibbles of round constant for row 0, 1 + ; AddRC_TwoRows + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + sbiw XL, 4 + + ror t0 + brcc row0_next1 + eor x0, t3 +row0_next1: + ror t0 + brcc row0_next2 + eor x1, t3 +row0_next2: + ror t0 + brcc row0_next3 + eor x2, t3 +row0_next3: + ror t0 + brcc row0_next4 + eor x3, t3 +row0_next4: + ; Sbox_TwoRows + eor x1, x2 + mov t1, x2 + and t1, x1 + eor x3, t1 + mov t1, x3 + and x3, x1 + eor x3, x2 + mov t2, x3 + eor x3, x0 + com x3 + mov x2, x3 + or t2, x0 + eor x0, t1 + eor x1, x0 + or x2, x1 + eor x2, t1 + eor x1, t2 + eor x3, x1 + Store_OneRow + + ; AddRC_TwoRows + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + sbiw XL, 4 + + ror t0 + brcc row1_next1 + eor x0, t3 +row1_next1: + ror t0 + brcc row1_next2 + eor x1, t3 +row1_next2: + ror t0 + brcc row1_next3 + eor x2, t3 +row1_next3: + ror t0 + brcc row1_next4 + eor x3, t3 +row1_next4: + ; Sbox_TwoRows + eor x1, x2 + mov t1, x2 + and t1, x1 + eor x3, t1 + mov t1, x3 + and x3, x1 + eor x3, x2 + mov t2, x3 + eor x3, x0 + com x3 + mov x2, x3 + or t2, x0 + eor x0, t1 + eor x1, x0 + or x2, x1 + eor x2, t1 + eor x1, t2 + eor x3, x1 + + ROTR_1_ROW + Store_OneRow + + lpm t0, Z+ ; Load two nibbles of round constant for row 2i, 2i+1 + ; AddRC_TwoRows + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + sbiw XL, 4 + + ror t0 + brcc row2_next1 + eor x0, t3 +row2_next1: + ror t0 + brcc row2_next2 + eor x1, t3 +row2_next2: + ror t0 + brcc row2_next3 + eor x2, t3 +row2_next3: + ror t0 + brcc row2_next4 + eor x3, t3 +row2_next4: + ; Sbox_TwoRows + eor x1, x2 + mov t1, x2 + and t1, x1 + eor x3, t1 + mov t1, x3 + and x3, x1 + eor x3, x2 + mov t2, x3 + eor x3, x0 + com x3 + mov x2, x3 + or t2, x0 + eor x0, t1 + eor x1, x0 + or x2, x1 + eor x2, t1 + eor x1, t2 + eor x3, x1 + + ROTR_1_ROW + ROTR_1_ROW + Store_OneRow + + ; AddRC_TwoRows + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + sbiw XL, 4 + + ror t0 + brcc row3_next1 + eor x0, t3 +row3_next1: + ror t0 + brcc row3_next2 + eor x1, t3 +row3_next2: + ror t0 + brcc row3_next3 + eor x2, t3 +row3_next3: + ror t0 + brcc row3_next4 + eor x3, t3 +row3_next4: + ; Sbox_TwoRows + eor x1, x2 + mov t1, x2 + and t1, x1 + eor x3, t1 + mov t1, x3 + and x3, x1 + eor x3, x2 + mov t2, x3 + eor x3, x0 + com x3 + mov x2, x3 + or t2, x0 + eor x0, t1 + eor x1, x0 + or x2, x1 + eor x2, t1 + eor x1, t2 + eor x3, x1 + + ROTR_4_ROW + ROTL_1_ROW + Store_OneRow + + lpm t0, Z+ ; Load two nibbles of round constant for row 2i, 2i+1 + ; AddRC_TwoRows + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + sbiw XL, 4 + + ror t0 + brcc row4_next1 + eor x0, t3 +row4_next1: + ror t0 + brcc row4_next2 + eor x1, t3 +row4_next2: + ror t0 + brcc row4_next3 + eor x2, t3 +row4_next3: + ror t0 + brcc row4_next4 + eor x3, t3 +row4_next4: + ; Sbox_TwoRows + eor x1, x2 + mov t1, x2 + and t1, x1 + eor x3, t1 + mov t1, x3 + and x3, x1 + eor x3, x2 + mov t2, x3 + eor x3, x0 + com x3 + mov x2, x3 + or t2, x0 + eor x0, t1 + eor x1, x0 + or x2, x1 + eor x2, t1 + eor x1, t2 + eor x3, x1 + + ROTR_4_ROW + Store_OneRow + + ; AddRC_TwoRows + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + sbiw XL, 4 + + ror t0 + brcc row5_next1 + eor x0, t3 +row5_next1: + ror t0 + brcc row5_next2 + eor x1, t3 +row5_next2: + ror t0 + brcc row5_next3 + eor x2, t3 +row5_next3: + ror t0 + brcc row5_next4 + eor x3, t3 +row5_next4: + ; Sbox_TwoRows + eor x1, x2 + mov t1, x2 + and t1, x1 + eor x3, t1 + mov t1, x3 + and x3, x1 + eor x3, x2 + mov t2, x3 + eor x3, x0 + com x3 + mov x2, x3 + or t2, x0 + eor x0, t1 + eor x1, x0 + or x2, x1 + eor x2, t1 + eor x1, t2 + eor x3, x1 + + ROTR_4_ROW + ROTR_1_ROW + Store_OneRow + + lpm t0, Z+ ; Load two nibbles of round constant for row 2i, 2i+1 + ; AddRC_TwoRows + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + sbiw XL, 4 + + ror t0 + brcc row6_next1 + eor x0, t3 +row6_next1: + ror t0 + brcc row6_next2 + eor x1, t3 +row6_next2: + ror t0 + brcc row6_next3 + eor x2, t3 +row6_next3: + ror t0 + brcc row6_next4 + eor x3, t3 +row6_next4: + ; Sbox_TwoRows + eor x1, x2 + mov t1, x2 + and t1, x1 + eor x3, t1 + mov t1, x3 + and x3, x1 + eor x3, x2 + mov t2, x3 + eor x3, x0 + com x3 + mov x2, x3 + or t2, x0 + eor x0, t1 + eor x1, x0 + or x2, x1 + eor x2, t1 + eor x1, t2 + eor x3, x1 + + ROTL_1_ROW + ROTL_1_ROW + Store_OneRow + + ; AddRC_TwoRows + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + sbiw XL, 4 + + ror t0 + brcc row7_next1 + eor x0, t3 +row7_next1: + ror t0 + brcc row7_next2 + eor x1, t3 +row7_next2: + ror t0 + brcc row7_next3 + eor x2, t3 +row7_next3: + ror t0 + brcc row7_next4 + eor x3, t3 +row7_next4: + ; Sbox_TwoRows + eor x1, x2 + mov t1, x2 + and t1, x1 + eor x3, t1 + mov t1, x3 + and x3, x1 + eor x3, x2 + mov t2, x3 + eor x3, x0 + com x3 + mov x2, x3 + or t2, x0 + eor x0, t1 + eor x1, x0 + or x2, x1 + eor x2, t1 + eor x1, t2 + eor x3, x1 + + ROTL_1_ROW + Store_OneRow + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Start MixColumns +MC_Start: + + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + movw YL, XL +A0: + clr x0 + clr x1 + clr x2 + clr x3 + mul2_GF16_0x13_xor + mul4_GF16_0x13_xor + mul2_GF16_0x13_xor + mulb_GF16_0x13_xor + mul2_GF16_0x13_xor + mul8_GF16_0x13_xor + mul5_GF16_0x13_xor + mul6_GF16_0x13_xor + movw XL, YL + Store_OneRow + +A1: + clr x0 + clr x1 + clr x2 + clr x3 + mul2_GF16_0x13_xor + mul4_GF16_0x13_xor + mul2_GF16_0x13_xor + mulb_GF16_0x13_xor + mul2_GF16_0x13_xor + mul8_GF16_0x13_xor + mul5_GF16_0x13_xor + movw XL, YL + mul6_GF16_0x13_xor + Store_OneRow + +A2: + clr x0 + clr x1 + clr x2 + clr x3 + mul2_GF16_0x13_xor + mul4_GF16_0x13_xor + mul2_GF16_0x13_xor + mulb_GF16_0x13_xor + mul2_GF16_0x13_xor + mul8_GF16_0x13_xor + movw XL, YL + mul5_GF16_0x13_xor + mul6_GF16_0x13_xor + Store_OneRow + +A3: + clr x0 + clr x1 + clr x2 + clr x3 + mul2_GF16_0x13_xor + mul4_GF16_0x13_xor + mul2_GF16_0x13_xor + mulb_GF16_0x13_xor + mul2_GF16_0x13_xor + movw XL, YL + mul8_GF16_0x13_xor + mul5_GF16_0x13_xor + mul6_GF16_0x13_xor + Store_OneRow + +A4: + clr x0 + clr x1 + clr x2 + clr x3 + mul2_GF16_0x13_xor + mul4_GF16_0x13_xor + mul2_GF16_0x13_xor + mulb_GF16_0x13_xor + movw XL, YL + mul2_GF16_0x13_xor + mul8_GF16_0x13_xor + mul5_GF16_0x13_xor + mul6_GF16_0x13_xor + Store_OneRow + +A5: + clr x0 + clr x1 + clr x2 + clr x3 + mul2_GF16_0x13_xor + mul4_GF16_0x13_xor + mul2_GF16_0x13_xor + movw XL, YL + mulb_GF16_0x13_xor + mul2_GF16_0x13_xor + mul8_GF16_0x13_xor + mul5_GF16_0x13_xor + mul6_GF16_0x13_xor + Store_OneRow + +A6: + clr x0 + clr x1 + clr x2 + clr x3 + mul2_GF16_0x13_xor + mul4_GF16_0x13_xor + movw XL, YL + mul2_GF16_0x13_xor + mulb_GF16_0x13_xor + mul2_GF16_0x13_xor + mul8_GF16_0x13_xor + mul5_GF16_0x13_xor + mul6_GF16_0x13_xor + Store_OneRow + +A7: + clr x0 + clr x1 + clr x2 + clr x3 + mul2_GF16_0x13_xor + movw XL, YL + mul4_GF16_0x13_xor + mul2_GF16_0x13_xor + mulb_GF16_0x13_xor + mul2_GF16_0x13_xor + mul8_GF16_0x13_xor + mul5_GF16_0x13_xor + mul6_GF16_0x13_xor + Store_OneRow +.ENDM + +PHOTON_Permutation: + ldi ZH, hi8(RC) + ldi ZL, lo8(RC) + ldi cnt2, ROUND_N +round_loop_start: + RoundFunction + dec cnt2 + breq round_loop_end + jmp round_loop_start +round_loop_end: +ret + +.section .text +RC: +.byte 0x01,0x62,0xFE,0x9D +.byte 0x23,0x40,0xDC,0xBF +.byte 0x67,0x04,0x98,0xFB +.byte 0xFE,0x9D,0x01,0x62 +.byte 0xCD,0xAE,0x32,0x51 +.byte 0xAB,0xC8,0x54,0x37 +.byte 0x76,0x15,0x89,0xEA +.byte 0xDC,0xBF,0x23,0x40 +.byte 0x89,0xEA,0x76,0x15 +.byte 0x32,0x51,0xCD,0xAE +.byte 0x45,0x26,0xBA,0xD9 +.byte 0xBA,0xD9,0x45,0x26 diff --git a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/api.h b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/api.h new file mode 100644 index 0000000..4ceda96 --- /dev/null +++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/assist.h b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/assist.h new file mode 100644 index 0000000..fdf3c68 --- /dev/null +++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/assist.h @@ -0,0 +1,140 @@ +; +; ********************************************** +; * PHOTON-Beetle * +; * Authenticated Encryption and Hash Family * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by PHOTON-Beetle Team * +; ********************************************** +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Bitslice +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +.MACRO Reorder_8_bits i0, i1, i2, i3, i4 + ror \i0 + ror \i1 + ror \i0 + ror \i2 + ror \i0 + ror \i3 + ror \i0 + ror \i4 + ror \i0 + ror \i1 + ror \i0 + ror \i2 + ror \i0 + ror \i3 + ror \i0 + ror \i4 +.ENDM + +.MACRO InvReorder_8_bits i0, i1, i2, i3, i4 + ror \i1 + ror \i0 + ror \i2 + ror \i0 + ror \i3 + ror \i0 + ror \i4 + ror \i0 + ror \i1 + ror \i0 + ror \i2 + ror \i0 + ror \i3 + ror \i0 + ror \i4 + ror \i0 +.ENDM + +; require XH:XL be the address of the input +Load_Reorder_32_bits: + ldi cnt1, 4 +reorder_8_bits_loop: + ld rmp, X+ + Reorder_8_bits rmp, x0, x1, x2, x3 + dec cnt1 + brne reorder_8_bits_loop +ret + +; require YH:YL be the address of the output +invReorder_Store_32_bits: + ldi cnt1, 4 +invreorder_8_bits_loop: + InvReorder_8_bits rmp, x0, x1, x2, x3 + st Y+, rmp + dec cnt1 + brne invreorder_8_bits_loop +ret + +; require XH:XL be the address of the input +; require YH:YL be the address of the output +Load_Reorder_Store_128_bits: + ldi cnt0, 4 +reorder_32_bits_loop: + rcall Load_Reorder_32_bits + st Y+, x0 + st Y+, x1 + st Y+, x2 + st Y+, x3 + dec cnt0 + brne reorder_32_bits_loop +ret + +; require XH:XL be the address of the input +; require YH:YL be the address of the output +Load_invReorder_Store_128_bits: + ldi cnt0, 4 +invreorder_32_bits_loop: + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + rcall invReorder_Store_32_bits + dec cnt0 + brne invreorder_32_bits_loop +ret + +.macro PUSH_ALL +push r2 +push r3 +push r4 +push r5 +push r6 +push r7 +push r8 +push r9 +push r10 +push r11 +push r12 +push r13 +push r14 +push r15 +push r16 +push r17 +push r28 +push r29 +.endm + +.macro POP_ALL +pop r29 +pop r28 +pop r17 +pop r16 +pop r15 +pop r14 +pop r13 +pop r12 +pop r11 +pop r10 +pop r9 +pop r8 +pop r7 +pop r6 +pop r5 +pop r4 +pop r3 +pop r2 +clr r1 +.endm diff --git a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/crypto_aead.h b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/crypto_aead.h new file mode 100644 index 0000000..cd820d3 --- /dev/null +++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/crypto_aead.h @@ -0,0 +1,26 @@ +#ifdef __cplusplus +extern "C" { +#endif + +int crypto_aead_encrypt( + unsigned char *c,unsigned long long *clen, + const unsigned char *m,unsigned long long mlen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ); + + +int crypto_aead_decrypt( + unsigned char *m,unsigned long long *outputmlen, + unsigned char *nsec, + const unsigned char *c,unsigned long long clen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ); + +#ifdef __cplusplus +} +#endif diff --git a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/encrypt.c b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/encrypt.c new file mode 100644 index 0000000..31bc9c4 --- /dev/null +++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/encrypt.c @@ -0,0 +1,98 @@ +#include +#include +#include +#include +#include "api.h" + +extern void crypto_aead_encrypt_asm( + unsigned char *c, + const unsigned char *m, + unsigned char mlen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +extern char crypto_aead_decrypt_asm( + unsigned char *m, + const unsigned char *c, + unsigned char clen, + const unsigned char *ad, + unsigned char adlen, + const unsigned char *npub, + const unsigned char *k + ); + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the cipher implementation goes here, + ... generating a ciphertext c[0],c[1],...,c[*clen-1] + ... from a plaintext m[0],m[1],...,m[mlen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce npub[0],npub[1],.. + ... and secret key k[0],k[1],... + ... the implementation shall not use nsec + ... + ... return 0; + */ + + (void)nsec; + + crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k); + + *clen = mlen + CRYPTO_ABYTES; + return 0; +} + + + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k + ) +{ + /* + ... + ... the code for the AEAD implementation goes here, + ... generating a plaintext m[0],m[1],...,m[*mlen-1] + ... and secret message number nsec[0],nsec[1],... + ... from a ciphertext c[0],c[1],...,c[clen-1] + ... and associated data ad[0],ad[1],...,ad[adlen-1] + ... and nonce number npub[0],npub[1],... + ... and secret key k[0],k[1],... + ... + ... return 0; + */ + unsigned long long mlen_; + char tag_is_match; + + (void)nsec; + if (clen < CRYPTO_ABYTES) { + return -1; + } + mlen_ = clen - CRYPTO_ABYTES; + + tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k); + + if (tag_is_match != 0) + { + memset(m, 0, (size_t)mlen_); + return -1; + } + *mlen = mlen_; + return 0; +} \ No newline at end of file diff --git a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/encrypt_core.S b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/encrypt_core.S new file mode 100644 index 0000000..e18201e --- /dev/null +++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/encrypt_core.S @@ -0,0 +1,734 @@ +; +; ********************************************** +; * PHOTON-Beetle * +; * Authenticated Encryption and Hash Family * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by PHOTON-Beetle Team * +; ********************************************** +; +#include "api.h" + +#define CRYPTO_AEAD + +#define STATE_INBITS 256 +#define STATE_INBYTES ((STATE_INBITS + 7) / 8) +#define RATE_INBITS 32 +#define RATE_INBYTES ((RATE_INBITS + 7) / 8) +#define RATE_INBYTES_MASK (RATE_INBYTES - 1) +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + + +; For CRYPTO_HASH +#define CRYPTO_BYTES 32 +#define INITIAL_RATE_INBITS 128 +#define INITIAL_RATE_INBYTES ((INITIAL_RATE_INBITS + 7) / 8) +#define HASH_RATE_INBITS 32 +#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8) +#define HASH_RATE_INBYTES_MASK (HASH_RATE_INBYTES - 1) + +; +; ============================================ +; R E G I S T E R D E F I N I T I O N S +; ============================================ +; +; [Add all register names here, include info on +; all used registers without specific names] +; rmp: Multipurpose register +#define rmp r16 +#define rate r17 +#define mclen r18 +#define radlen r19 +#define adlen_org r0 + +#define cnt0 r20 +#define cnt1 r21 +#define cnt2 r22 + +#define SQUEEZE_RATE_INBITS 128 +#define SQUEEZE_RATE_INBYTES ((SQUEEZE_RATE_INBITS + 7) / 8) +#define SQUEEZE_RATE_MASK (SQUEEZE_RATE_INBYTES - 1) + +#define CAPACITY_INBITS (STATE_INBITS - RATE_INBITS) +#define CAPACITY_INBYTES ((CAPACITY_INBITS + 7) / 8) + +; For CRYPTO_AEAD +#define KEY_INBITS (CRYPTO_KEYBYTES * 8) +#define KEY_INBYTES (CRYPTO_KEYBYTES) + +#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8) +#define NONCE_INBYTES (CRYPTO_NPUBBYTES) + +#define TAG_INBITS (CRYPTO_ABYTES * 8) +#define TAG_INBYTES (CRYPTO_ABYTES) + +#define t0 r8 +#define t1 r9 +#define t2 r10 +#define t3 r11 + +#define x0 r12 +#define x1 r13 +#define x2 r14 +#define x3 r15 + +#define ed r1 + +#define addr0 r2 +#define addr1 r3 +#define addr2 r4 +#define addr3 r5 +#define addr4 r6 +#define addr5 r7 + +; domain_cnt overlap with cnt0, only temporarily used, no need to back up +#define domain_cnt r20 +#define domain_cnt0 r23 +#define domain_cnt1 r24 + +.section .noinit + SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_LEN: .BYTE 0, 0 +#ifdef CRYPTO_AEAD +; For CRYPTO_AEAD + SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0 + SRAM_ADLEN: .BYTE 0, 0 + SRAM_NONCE_ADDR: .BYTE 0, 0 + SRAM_KEY_ADDR: .BYTE 0, 0 + SRAM_ADDITIONAL: .BYTE 0, 0, 0, 0 + .BYTE 0, 0, 0, 0 + .BYTE 0, 0, 0, 0 + .BYTE 0, 0, 0, 0 +#endif + +; SRAM required additionally, besides those used for API + SRAM_PAD: .BYTE 0, 0, 0, 0 +#if ((defined(CRYPTO_AEAD) && (RATE_INBYTES > 4)) || defined(CRYPTO_HASH)) + .BYTE 0, 0, 0, 0 + .BYTE 0, 0, 0, 0 + .BYTE 0, 0, 0, 0 +#endif + + +.section .text + +#include "assist.h" +#include "photon.h" + +AddDomainCounter: + ldi YH, hi8(SRAM_STATE + STATE_INBYTES - 3) + ldi YL, lo8(SRAM_STATE + STATE_INBYTES - 3) + ldi rmp, 0x80 + ldi cnt1, 3 +check_domain_bit: + ror domain_cnt + brcc no_xor + ld x0, Y + eor x0, rmp + st Y, x0 +no_xor: + adiw YL, 1 + dec cnt1 + brne check_domain_bit +ret + +; require XH:XL be the address of the current associated data/message block +XOR_to_State: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + mov cnt0, rate + dec cnt0 +XOR_to_State_loop: + rcall Load_Reorder_32_bits + ld rmp, Y + eor rmp, x0 + st Y+, rmp + ld rmp, Y + eor rmp, x1 + st Y+, rmp + ld rmp, Y + eor rmp, x2 + st Y+, rmp + ld rmp, Y + eor rmp, x3 + st Y+, rmp + subi cnt0, 4 + brsh XOR_to_State_loop +; XH:XL are now the address of the next associated data/message block if this is not the last block +ret + +; require XH:XL pointed to the source data to be padded +PAD_OneZero: + ldi YH, hi8(SRAM_PAD) + ldi YL, lo8(SRAM_PAD) + mov cnt1, rate +pad_copy: + ld rmp, X+ + st Y+, rmp + dec cnt1 + dec cnt0 + brne pad_copy +pad_one: + ldi rmp, 1 + st Y+, rmp + dec cnt1 + breq pad_end + clr rmp +pad_zero: + st Y+, rmp + dec cnt1 + brne pad_zero +pad_end: + ldi XH, hi8(SRAM_PAD) + ldi XL, lo8(SRAM_PAD) +; XH:XL are now pointed to last block needed to be processed +ret + +HASH: + movw addr0, XL +hash_block_loop: + rcall PHOTON_Permutation + movw XL, addr0 + cp rate, radlen + brsh hash_last_block + rcall XOR_to_State + movw addr0, XL + sub radlen, rate + rjmp hash_block_loop + +hash_last_block: + cp radlen, rate + breq hash_xor_domain + mov cnt0, radlen + rcall PAD_OneZero + +hash_xor_domain: + clr radlen + rcall XOR_to_State + mov domain_cnt, domain_cnt0 + rcall AddDomainCounter +ret + +TAG: + rcall PHOTON_Permutation + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + movw YL, addr2 + rcall Load_invReorder_Store_128_bits +ret + +#ifdef CRYPTO_AEAD +.IF (RATE_INBITS == 128) +XOR_to_Cipher: + mov t2, rate + cp t2, mclen + brlo XOR_to_Cipher_Start + mov t2, mclen ; backup the real length of the remaining message + +XOR_to_Cipher_Start: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + ldi YH, hi8(SRAM_ADDITIONAL) + ldi YL, lo8(SRAM_ADDITIONAL) + rcall Load_invReorder_Store_128_bits ; State move to additional SRAM and reorder + + movw XL, addr0 + movw ZL, addr2 + + ; XOR Part 2 + sbiw YL, (RATE_INBYTES>>1) ; Pointed to Part 2 + ldi cnt0, (RATE_INBYTES>>1) + cp cnt0, mclen + brlo XOR_Part2_Store_Cipher_begin + mov cnt0, mclen +XOR_Part2_Store_Cipher_begin: + sub mclen, cnt0 +XOR_Part2_Store_Cipher_loop: + ld t0, Y+ + ld x0, X+ + eor x0, t0 + st Z+, x0 + dec cnt0 + brne XOR_Part2_Store_Cipher_loop + + cpi mclen, 1 + brlo XOR_to_Cipher_END + + ; XOR (Part 1 >>> 1) + ldi cnt0, (RATE_INBYTES>>1) + cp cnt0, mclen + brlo XOR_Part1_Store_Cipher_begin + mov cnt0, mclen +XOR_Part1_Store_Cipher_begin: + sub mclen, cnt0 + ldi YH, hi8(SRAM_ADDITIONAL) + ldi YL, lo8(SRAM_ADDITIONAL) + ld t0, Y + bst t0, 0 + adiw YL, (RATE_INBYTES>>1)-1 + ld t0, Y + ror t0 + bld t0, 7 + st Y, t0 + ldi cnt1, (RATE_INBYTES>>1)-1 +ROR_part1_loop: + ld t0, -Y + ror t0 + st Y, t0 + dec cnt1 + brne ROR_part1_loop + +XOR_Part1_Store_Cipher_loop: + ld t0, Y+ + ld x0, X+ + eor x0, t0 + st Z+, x0 + dec cnt0 + brne XOR_Part1_Store_Cipher_loop + +XOR_to_Cipher_END: + tst ed + brne XOR_to_Cipher_dec + +XOR_to_Cipher_enc: + movw XL, addr0 + cp t2, rate + brsh XOR_to_Cipher_XOR_to_State + mov cnt0, t2 + rcall PAD_OneZero + rjmp XOR_to_Cipher_XOR_to_State + +XOR_to_Cipher_dec: + movw XL, addr2 + cp t2, rate + brsh XOR_to_Cipher_XOR_to_State + ; need to be padded + mov cnt0, t2 + rcall PAD_OneZero + +XOR_to_Cipher_XOR_to_State: + rcall XOR_to_State + + clr rmp + add addr0, t2 + adc addr1, rmp + add addr2, t2 + adc addr3, rmp +ret +.ELSE +; RATE_INBITS == 32 +XOR_to_Cipher: + mov t2, rate + cp t2, mclen + brlo XOR_to_Cipher_Start + mov t2, mclen ; backup the real length of the remaining message + +XOR_to_Cipher_Start: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + ldi YH, hi8(SRAM_ADDITIONAL) + ldi YL, lo8(SRAM_ADDITIONAL) + rcall invReorder_Store_32_bits + + movw XL, addr0 + movw ZL, addr2 + + ; XOR Part 2 + sbiw YL, (RATE_INBYTES>>1) ; Pointed to Part 2 + ldi cnt0, (RATE_INBYTES>>1) + cp cnt0, mclen + brlo XOR_Part2_Store_Cipher_begin + mov cnt0, mclen +XOR_Part2_Store_Cipher_begin: + sub mclen, cnt0 +XOR_Part2_Store_Cipher_loop: + ld t0, Y+ + ld x0, X+ + eor x0, t0 + st Z+, x0 + dec cnt0 + brne XOR_Part2_Store_Cipher_loop + + cpi mclen, 1 + brlo XOR_to_Cipher_END + + ; XOR (Part 1 >>> 1) + ldi cnt0, (RATE_INBYTES>>1) + cp cnt0, mclen + brlo XOR_Part1_Store_Cipher_begin + mov cnt0, mclen +XOR_Part1_Store_Cipher_begin: + sub mclen, cnt0 + ldi YH, hi8(SRAM_ADDITIONAL) + ldi YL, lo8(SRAM_ADDITIONAL) + ld t0, Y+ + ld t1, Y+ + bst t0, 0 + ror t1 + ror t0 + bld t1, 7 + + ld x0, X+ + eor x0, t0 + st Z+, x0 + dec cnt0 + breq XOR_to_Cipher_END + ld x0, X+ + eor x0, t1 + st Z+, x0 + +XOR_to_Cipher_END: + tst ed + brne XOR_to_Cipher_dec + +XOR_to_Cipher_enc: + movw XL, addr0 + cp t2, rate + brsh XOR_to_Cipher_XOR_to_State + mov cnt0, t2 + rcall PAD_OneZero + rjmp XOR_to_Cipher_XOR_to_State + +XOR_to_Cipher_dec: + movw XL, addr2 + cp t2, rate + brsh XOR_to_Cipher_XOR_to_State + ; need to be padded + mov cnt0, t2 + rcall PAD_OneZero + +XOR_to_Cipher_XOR_to_State: + rcall XOR_to_State + + clr rmp + add addr0, t2 + adc addr1, rmp + add addr2, t2 + adc addr3, rmp +ret +.ENDIF + +ENC: + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + lds XH, SRAM_MESSAGE_IN_ADDR + lds XL, SRAM_MESSAGE_IN_ADDR + 1 + + movw addr0, XL + movw addr2, ZL + +enc_block_loop: + rcall PHOTON_Permutation + rcall XOR_to_Cipher + cpi mclen, 1 + brsh enc_block_loop + + mov domain_cnt, domain_cnt1 + rcall AddDomainCounter +ret + +AUTH_AND_ENCDEC: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + + lds XH, SRAM_NONCE_ADDR + lds XL, SRAM_NONCE_ADDR + 1 + rcall Load_Reorder_Store_128_bits + lds XH, SRAM_KEY_ADDR + lds XL, SRAM_KEY_ADDR + 1 + rcall Load_Reorder_Store_128_bits + + ldi domain_cnt0, 1 + ldi domain_cnt1, 1 + +test_adlen_zero: + tst radlen + breq adlen_zero_test_mlen_zero + + ; radlen != 0 +adlen_nzero_test_mlen_zero: + tst mclen + brne test_adlen_divisible + ldi domain_cnt0, 3 +test_adlen_divisible: + mov rmp, radlen + andi rmp, RATE_INBYTES_MASK + breq hash_ad + inc domain_cnt0 ; 2 or 4 +hash_ad: + lds XH, SRAM_ASSOCIATED_DATA_ADDR + lds XL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall HASH + tst mclen + breq mlen_zero_inputout_address + rjmp test_mlen_divisible + +adlen_zero_test_mlen_zero: + ldi domain_cnt1, 5 + tst mclen + breq adlen_zero_mlen_zero + + ; mclen != 0 +test_mlen_divisible: + mov rmp, mclen + andi rmp, RATE_INBYTES_MASK + breq enc_dec_m + inc domain_cnt1 ; 2 or 6 +enc_dec_m: + rcall ENC + rjmp AUTH_AND_ENCDEC_end + +adlen_zero_mlen_zero: + ; empty message and empty associated data + ldi YH, hi8(SRAM_STATE + STATE_INBYTES - 3) + ldi YL, lo8(SRAM_STATE + STATE_INBYTES - 3) + ld x0, Y + ldi rmp, 0x80 + eor x0, rmp + st Y, x0 + +mlen_zero_inputout_address: + tst ed + brne dec_inputout_address +enc_inputout_address: + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + movw addr2, ZL + rjmp AUTH_AND_ENCDEC_end +dec_inputout_address: + lds ZH, SRAM_MESSAGE_IN_ADDR + lds ZL, SRAM_MESSAGE_IN_ADDR + 1 + movw addr0, ZL + +AUTH_AND_ENCDEC_end: +ret + + +; void crypto_aead_encrypt_asm( +; unsigned char *c, +; const unsigned char *m, +; unsigned long long mlen, +; const unsigned char *ad, +; unsigned long long radlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *c, is passed in r24:r25 +; const unsigned char *m, is passed in r22:r23 +; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long radlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_encrypt_asm +crypto_aead_encrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov mclen, r20 + mov radlen, r16 + + ldi rate, RATE_INBYTES + clr ed + + rcall AUTH_AND_ENCDEC + rcall TAG + + POP_ALL +ret + +; int crypto_aead_decrypt_asm( +; unsigned char *m, +; const unsigned char *c, +; unsigned long long clen, +; const unsigned char *ad, +; unsigned long long radlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *m, is passed in r24:r25 +; const unsigned char *c, is passed in r22:r23 +; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long radlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_decrypt_asm +crypto_aead_decrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov mclen, r20 + mov radlen, r16 + + ldi rate, RATE_INBYTES + clr ed + inc ed + + rcall AUTH_AND_ENCDEC + + ldi YH, hi8(SRAM_ADDITIONAL) + ldi YL, lo8(SRAM_ADDITIONAL) + movw addr2, YL + rcall TAG + + sbiw YL, CRYPTO_ABYTES + movw XL, addr0 + + ldi cnt0, CRYPTO_ABYTES +compare_tag: + ld t0, Y+ + ld x0, X+ + cp t0, x0 + brne return_tag_not_match + dec cnt0 + brne compare_tag + rjmp return_tag_match + +return_tag_not_match: + ldi r25, 0xFF + ldi r24, 0xFF + rjmp crypto_aead_decrypt_end +return_tag_match: + clr r25 + clr r24 + +crypto_aead_decrypt_end: + POP_ALL +ret + +; #ifdef CRYPTO_AEAD +#endif + +#ifdef CRYPTO_HASH +; void crypto_hash_asm( +; unsigned char *out, +; const unsigned char *in, +; unsigned long long inlen +; ) +; +; unsigned char *out, is passed in r24:r25 +; const unsigned char *in, is passed in r22:r23 +; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used +.global crypto_hash_asm +crypto_hash_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + mov mclen, r20 + + ; empty half state + ldi YH, hi8(SRAM_STATE + INITIAL_RATE_INBYTES) + ldi YL, lo8(SRAM_STATE + INITIAL_RATE_INBYTES) + clr rmp + ldi cnt1, (STATE_INBYTES - INITIAL_RATE_INBYTES) +zero_state: + st Y+, rmp + dec cnt1 + brne zero_state + + ldi domain_cnt0, 1 + sbiw YL, (STATE_INBYTES - INITIAL_RATE_INBYTES) + lds XH, SRAM_MESSAGE_IN_ADDR + lds XL, SRAM_MESSAGE_IN_ADDR + 1 + + tst mclen + breq add_domain + +test_mlen_initrate: + ; mclen != 0 + cpi mclen, INITIAL_RATE_INBYTES + brlo less_than_initial_rate + breq equal_to_initial_rate + +more_than_initial_rate: + rcall Load_Reorder_Store_128_bits + ldi rate, HASH_RATE_INBYTES + mov radlen, mclen + subi radlen, INITIAL_RATE_INBYTES + mov rmp, radlen + andi rmp, HASH_RATE_INBYTES_MASK + breq hash_message + inc domain_cnt0 +hash_message: + rcall HASH + rjmp gen_digest + +equal_to_initial_rate: + inc domain_cnt0 + rcall Load_Reorder_Store_128_bits + rjmp add_domain + +less_than_initial_rate: + mov cnt0, mclen + ldi rate, INITIAL_RATE_INBYTES + rcall PAD_OneZero + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + rcall Load_Reorder_Store_128_bits + rjmp add_domain + +add_domain: + mov domain_cnt, domain_cnt0 + rcall AddDomainCounter +gen_digest: + lds XH, SRAM_MESSAGE_OUT_ADDR + lds XL, SRAM_MESSAGE_OUT_ADDR + 1 + movw addr2, XL + rcall TAG + movw XL, addr2 + adiw XL, SQUEEZE_RATE_INBYTES + movw addr2, XL + rcall TAG + + POP_ALL +ret + +#endif \ No newline at end of file diff --git a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/photon.h b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/photon.h new file mode 100644 index 0000000..d764e16 --- /dev/null +++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_speed/photon.h @@ -0,0 +1,714 @@ +; +; ********************************************** +; * PHOTON-Beetle * +; * Authenticated Encryption and Hash Family * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by PHOTON-Beetle Team * +; ********************************************** +; +#define ROUND_N 12 +#define DIM 8 + +.MACRO Store_OneRow + st X+, x0 + st X+, x1 + st X+, x2 + st X+, x3 +.ENDM + +.MACRO ROTL_1 i0 + bst \i0, 7 + lsl \i0 + bld \i0, 0 +.ENDM + +.MACRO ROTR_1 i0 + bst \i0, 0 + lsr \i0 + bld \i0, 7 +.ENDM + +.MACRO ROTR_4 i0 + swap \i0 +.ENDM + +.MACRO ROTR_1_ROW + ROTR_1 x0 + ROTR_1 x1 + ROTR_1 x2 + ROTR_1 x3 +.ENDM + +.MACRO ROTL_1_ROW + ROTL_1 x0 + ROTL_1 x1 + ROTL_1 x2 + ROTL_1 x3 +.ENDM + +.MACRO ROTR_4_ROW + ROTR_4 x0 + ROTR_4 x1 + ROTR_4 x2 + ROTR_4 x3 +.ENDM + + +; For all mul2_GF16_0x13_xor: +; Input +; MSB........LSB +; x0=@0: x1=@1: x2=@2: x3=@3 + ; # define mul2_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \ + ; x3 = XOR (x3 ,x0); \ + ; } while (0) ; /* Output : ( MSB ) x1 ,x2 ,x3 , x0 ( LSB ) */ +.MACRO mul2_GF16_0x13_xor + ld t3, X+ + ld t2, X+ + ld t1, X+ + ld t0, X+ + eor t3, t0 + eor x0, t0 + eor x1, t3 + eor x2, t2 + eor x3, t1 +.ENDM + + ; # define mul4_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \ + ; x3 = XOR (x3 ,x0); x0 = XOR (x0 ,x1); \ + ; } while (0) ; /* Output : ( MSB ) x2 ,x3 ,x0 , x1 ( LSB ) */ +.MACRO mul4_GF16_0x13_xor + ld t3, X+ + ld t2, X+ + ld t1, X+ + ld t0, X+ + eor t3, t0 + eor t0, t1 + eor x0, t1 + eor x1, t0 + eor x2, t3 + eor x3, t2 +.ENDM + + ; # define mul5_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \ + ; x2 = XOR (x2 ,x0); x3 = XOR (x3 ,x1); \ + ; x1 = XOR (x1 ,x2); x0 = XOR (x0 ,x3); \ + ; } while (0) ; /* Output : ( MSB ) x2 ,x0 ,x1 , x3 ( LSB ) */ +.MACRO mul5_GF16_0x13_xor + ld t3, X+ + ld t2, X+ + ld t1, X+ + ld t0, X+ + eor t2, t0 + eor t3, t1 + eor t1, t2 + eor t0, t3 + eor x0, t3 + eor x1, t1 + eor x2, t0 + eor x3, t2 +.ENDM + + ; # define mul6_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \ + ; x3 = XOR (x3 ,x1); x1 = XOR (x1 ,x0); \ + ; x2 = XOR (x2 ,x1); x0 = XOR (x0 ,x2); \ + ; x2 = XOR (x2 ,x3); \ + ; } while (0) ; /* Output : ( MSB ) x0 ,x2 ,x3 , x1 ( LSB ) */ +.MACRO mul6_GF16_0x13_xor + ld t3, X+ + ld t2, X+ + ld t1, X+ + ld t0, X+ + eor t3, t1 + eor t1, t0 + eor t2, t1 + eor t0, t2 + eor t2, t3 + eor x0, t1 + eor x1, t3 + eor x2, t2 + eor x3, t0 +.ENDM + + ; # define mul8_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \ + ; x3 = XOR (x3 ,x0); x0 = XOR (x0 ,x1); \ + ; x1 = XOR (x1 ,x2); \ + ; } while (0) ; /* Output : ( MSB ) x3 ,x0 ,x1 , x2 ( LSB ) */ +.MACRO mul8_GF16_0x13_xor + ld t3, X+ + ld t2, X+ + ld t1, X+ + ld t0, X+ + eor t3, t0 + eor t0, t1 + eor t1, t2 + eor x0, t2 + eor x1, t1 + eor x2, t0 + eor x3, t3 +.ENDM + + ; # define mul11_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \ + ; x2 = XOR (x2 ,x0); x1 = XOR (x1 ,x3); \ + ; x0 = XOR (x0 ,x1); x3 = XOR (x3 ,x2); \ + ; } while (0) ; /* Output : ( MSB ) x1 ,x2 ,x0 , x3 ( LSB ) */ +.MACRO mulb_GF16_0x13_xor + ld t3, X+ + ld t2, X+ + ld t1, X+ + ld t0, X+ + eor t2, t0 + eor t1, t3 + eor t0, t1 + eor t3, t2 + eor x0, t3 + eor x1, t0 + eor x2, t2 + eor x3, t1 +.ENDM + + +.MACRO RoundFunction +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Start AddRC_Sbox_ShiftRow +AddRC_Sbox_ShiftRow_Start: + clr t3 + inc t3 + + ldi XL, lo8(SRAM_STATE) + ldi XH, hi8(SRAM_STATE) + + lpm t0, Z+ ; Load two nibbles of round constant for row 0, 1 + ; AddRC_TwoRows + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + sbiw XL, 4 + + ror t0 + brcc row0_next1 + eor x0, t3 +row0_next1: + ror t0 + brcc row0_next2 + eor x1, t3 +row0_next2: + ror t0 + brcc row0_next3 + eor x2, t3 +row0_next3: + ror t0 + brcc row0_next4 + eor x3, t3 +row0_next4: + ; Sbox_TwoRows + eor x1, x2 + mov t1, x2 + and t1, x1 + eor x3, t1 + mov t1, x3 + and x3, x1 + eor x3, x2 + mov t2, x3 + eor x3, x0 + com x3 + mov x2, x3 + or t2, x0 + eor x0, t1 + eor x1, x0 + or x2, x1 + eor x2, t1 + eor x1, t2 + eor x3, x1 + Store_OneRow + + ; AddRC_TwoRows + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + sbiw XL, 4 + + ror t0 + brcc row1_next1 + eor x0, t3 +row1_next1: + ror t0 + brcc row1_next2 + eor x1, t3 +row1_next2: + ror t0 + brcc row1_next3 + eor x2, t3 +row1_next3: + ror t0 + brcc row1_next4 + eor x3, t3 +row1_next4: + ; Sbox_TwoRows + eor x1, x2 + mov t1, x2 + and t1, x1 + eor x3, t1 + mov t1, x3 + and x3, x1 + eor x3, x2 + mov t2, x3 + eor x3, x0 + com x3 + mov x2, x3 + or t2, x0 + eor x0, t1 + eor x1, x0 + or x2, x1 + eor x2, t1 + eor x1, t2 + eor x3, x1 + + ROTR_1_ROW + Store_OneRow + + lpm t0, Z+ ; Load two nibbles of round constant for row 2i, 2i+1 + ; AddRC_TwoRows + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + sbiw XL, 4 + + ror t0 + brcc row2_next1 + eor x0, t3 +row2_next1: + ror t0 + brcc row2_next2 + eor x1, t3 +row2_next2: + ror t0 + brcc row2_next3 + eor x2, t3 +row2_next3: + ror t0 + brcc row2_next4 + eor x3, t3 +row2_next4: + ; Sbox_TwoRows + eor x1, x2 + mov t1, x2 + and t1, x1 + eor x3, t1 + mov t1, x3 + and x3, x1 + eor x3, x2 + mov t2, x3 + eor x3, x0 + com x3 + mov x2, x3 + or t2, x0 + eor x0, t1 + eor x1, x0 + or x2, x1 + eor x2, t1 + eor x1, t2 + eor x3, x1 + + ROTR_1_ROW + ROTR_1_ROW + Store_OneRow + + ; AddRC_TwoRows + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + sbiw XL, 4 + + ror t0 + brcc row3_next1 + eor x0, t3 +row3_next1: + ror t0 + brcc row3_next2 + eor x1, t3 +row3_next2: + ror t0 + brcc row3_next3 + eor x2, t3 +row3_next3: + ror t0 + brcc row3_next4 + eor x3, t3 +row3_next4: + ; Sbox_TwoRows + eor x1, x2 + mov t1, x2 + and t1, x1 + eor x3, t1 + mov t1, x3 + and x3, x1 + eor x3, x2 + mov t2, x3 + eor x3, x0 + com x3 + mov x2, x3 + or t2, x0 + eor x0, t1 + eor x1, x0 + or x2, x1 + eor x2, t1 + eor x1, t2 + eor x3, x1 + + ROTR_4_ROW + ROTL_1_ROW + Store_OneRow + + lpm t0, Z+ ; Load two nibbles of round constant for row 2i, 2i+1 + ; AddRC_TwoRows + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + sbiw XL, 4 + + ror t0 + brcc row4_next1 + eor x0, t3 +row4_next1: + ror t0 + brcc row4_next2 + eor x1, t3 +row4_next2: + ror t0 + brcc row4_next3 + eor x2, t3 +row4_next3: + ror t0 + brcc row4_next4 + eor x3, t3 +row4_next4: + ; Sbox_TwoRows + eor x1, x2 + mov t1, x2 + and t1, x1 + eor x3, t1 + mov t1, x3 + and x3, x1 + eor x3, x2 + mov t2, x3 + eor x3, x0 + com x3 + mov x2, x3 + or t2, x0 + eor x0, t1 + eor x1, x0 + or x2, x1 + eor x2, t1 + eor x1, t2 + eor x3, x1 + + ROTR_4_ROW + Store_OneRow + + ; AddRC_TwoRows + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + sbiw XL, 4 + + ror t0 + brcc row5_next1 + eor x0, t3 +row5_next1: + ror t0 + brcc row5_next2 + eor x1, t3 +row5_next2: + ror t0 + brcc row5_next3 + eor x2, t3 +row5_next3: + ror t0 + brcc row5_next4 + eor x3, t3 +row5_next4: + ; Sbox_TwoRows + eor x1, x2 + mov t1, x2 + and t1, x1 + eor x3, t1 + mov t1, x3 + and x3, x1 + eor x3, x2 + mov t2, x3 + eor x3, x0 + com x3 + mov x2, x3 + or t2, x0 + eor x0, t1 + eor x1, x0 + or x2, x1 + eor x2, t1 + eor x1, t2 + eor x3, x1 + + ROTR_4_ROW + ROTR_1_ROW + Store_OneRow + + lpm t0, Z+ ; Load two nibbles of round constant for row 2i, 2i+1 + ; AddRC_TwoRows + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + sbiw XL, 4 + + ror t0 + brcc row6_next1 + eor x0, t3 +row6_next1: + ror t0 + brcc row6_next2 + eor x1, t3 +row6_next2: + ror t0 + brcc row6_next3 + eor x2, t3 +row6_next3: + ror t0 + brcc row6_next4 + eor x3, t3 +row6_next4: + ; Sbox_TwoRows + eor x1, x2 + mov t1, x2 + and t1, x1 + eor x3, t1 + mov t1, x3 + and x3, x1 + eor x3, x2 + mov t2, x3 + eor x3, x0 + com x3 + mov x2, x3 + or t2, x0 + eor x0, t1 + eor x1, x0 + or x2, x1 + eor x2, t1 + eor x1, t2 + eor x3, x1 + + ROTL_1_ROW + ROTL_1_ROW + Store_OneRow + + ; AddRC_TwoRows + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + sbiw XL, 4 + + ror t0 + brcc row7_next1 + eor x0, t3 +row7_next1: + ror t0 + brcc row7_next2 + eor x1, t3 +row7_next2: + ror t0 + brcc row7_next3 + eor x2, t3 +row7_next3: + ror t0 + brcc row7_next4 + eor x3, t3 +row7_next4: + ; Sbox_TwoRows + eor x1, x2 + mov t1, x2 + and t1, x1 + eor x3, t1 + mov t1, x3 + and x3, x1 + eor x3, x2 + mov t2, x3 + eor x3, x0 + com x3 + mov x2, x3 + or t2, x0 + eor x0, t1 + eor x1, x0 + or x2, x1 + eor x2, t1 + eor x1, t2 + eor x3, x1 + + ROTL_1_ROW + Store_OneRow + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Start MixColumns +MC_Start: + + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + movw YL, XL +A0: + clr x0 + clr x1 + clr x2 + clr x3 + mul2_GF16_0x13_xor + mul4_GF16_0x13_xor + mul2_GF16_0x13_xor + mulb_GF16_0x13_xor + mul2_GF16_0x13_xor + mul8_GF16_0x13_xor + mul5_GF16_0x13_xor + mul6_GF16_0x13_xor + movw XL, YL + Store_OneRow + +A1: + clr x0 + clr x1 + clr x2 + clr x3 + mul2_GF16_0x13_xor + mul4_GF16_0x13_xor + mul2_GF16_0x13_xor + mulb_GF16_0x13_xor + mul2_GF16_0x13_xor + mul8_GF16_0x13_xor + mul5_GF16_0x13_xor + movw XL, YL + mul6_GF16_0x13_xor + Store_OneRow + +A2: + clr x0 + clr x1 + clr x2 + clr x3 + mul2_GF16_0x13_xor + mul4_GF16_0x13_xor + mul2_GF16_0x13_xor + mulb_GF16_0x13_xor + mul2_GF16_0x13_xor + mul8_GF16_0x13_xor + movw XL, YL + mul5_GF16_0x13_xor + mul6_GF16_0x13_xor + Store_OneRow + +A3: + clr x0 + clr x1 + clr x2 + clr x3 + mul2_GF16_0x13_xor + mul4_GF16_0x13_xor + mul2_GF16_0x13_xor + mulb_GF16_0x13_xor + mul2_GF16_0x13_xor + movw XL, YL + mul8_GF16_0x13_xor + mul5_GF16_0x13_xor + mul6_GF16_0x13_xor + Store_OneRow + +A4: + clr x0 + clr x1 + clr x2 + clr x3 + mul2_GF16_0x13_xor + mul4_GF16_0x13_xor + mul2_GF16_0x13_xor + mulb_GF16_0x13_xor + movw XL, YL + mul2_GF16_0x13_xor + mul8_GF16_0x13_xor + mul5_GF16_0x13_xor + mul6_GF16_0x13_xor + Store_OneRow + +A5: + clr x0 + clr x1 + clr x2 + clr x3 + mul2_GF16_0x13_xor + mul4_GF16_0x13_xor + mul2_GF16_0x13_xor + movw XL, YL + mulb_GF16_0x13_xor + mul2_GF16_0x13_xor + mul8_GF16_0x13_xor + mul5_GF16_0x13_xor + mul6_GF16_0x13_xor + Store_OneRow + +A6: + clr x0 + clr x1 + clr x2 + clr x3 + mul2_GF16_0x13_xor + mul4_GF16_0x13_xor + movw XL, YL + mul2_GF16_0x13_xor + mulb_GF16_0x13_xor + mul2_GF16_0x13_xor + mul8_GF16_0x13_xor + mul5_GF16_0x13_xor + mul6_GF16_0x13_xor + Store_OneRow + +A7: + clr x0 + clr x1 + clr x2 + clr x3 + mul2_GF16_0x13_xor + movw XL, YL + mul4_GF16_0x13_xor + mul2_GF16_0x13_xor + mulb_GF16_0x13_xor + mul2_GF16_0x13_xor + mul8_GF16_0x13_xor + mul5_GF16_0x13_xor + mul6_GF16_0x13_xor + Store_OneRow +.ENDM + +PHOTON_Permutation: + ldi ZH, hi8(RC) + ldi ZL, lo8(RC) + ldi cnt2, ROUND_N +round_loop_start: + RoundFunction + dec cnt2 + breq round_loop_end + jmp round_loop_start +round_loop_end: +ret + +.section .text +RC: +.byte 0x01,0x62,0xFE,0x9D +.byte 0x23,0x40,0xDC,0xBF +.byte 0x67,0x04,0x98,0xFB +.byte 0xFE,0x9D,0x01,0x62 +.byte 0xCD,0xAE,0x32,0x51 +.byte 0xAB,0xC8,0x54,0x37 +.byte 0x76,0x15,0x89,0xEA +.byte 0xDC,0xBF,0x23,0x40 +.byte 0x89,0xEA,0x76,0x15 +.byte 0x32,0x51,0xCD,0xAE +.byte 0x45,0x26,0xBA,0xD9 +.byte 0xBA,0xD9,0x45,0x26 diff --git a/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/api.h b/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/api.h new file mode 100644 index 0000000..cb530c7 --- /dev/null +++ b/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/api.h @@ -0,0 +1 @@ +#define CRYPTO_BYTES 32 \ No newline at end of file diff --git a/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/assist.h b/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/assist.h new file mode 100644 index 0000000..fdf3c68 --- /dev/null +++ b/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/assist.h @@ -0,0 +1,140 @@ +; +; ********************************************** +; * PHOTON-Beetle * +; * Authenticated Encryption and Hash Family * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by PHOTON-Beetle Team * +; ********************************************** +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Bitslice +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +.MACRO Reorder_8_bits i0, i1, i2, i3, i4 + ror \i0 + ror \i1 + ror \i0 + ror \i2 + ror \i0 + ror \i3 + ror \i0 + ror \i4 + ror \i0 + ror \i1 + ror \i0 + ror \i2 + ror \i0 + ror \i3 + ror \i0 + ror \i4 +.ENDM + +.MACRO InvReorder_8_bits i0, i1, i2, i3, i4 + ror \i1 + ror \i0 + ror \i2 + ror \i0 + ror \i3 + ror \i0 + ror \i4 + ror \i0 + ror \i1 + ror \i0 + ror \i2 + ror \i0 + ror \i3 + ror \i0 + ror \i4 + ror \i0 +.ENDM + +; require XH:XL be the address of the input +Load_Reorder_32_bits: + ldi cnt1, 4 +reorder_8_bits_loop: + ld rmp, X+ + Reorder_8_bits rmp, x0, x1, x2, x3 + dec cnt1 + brne reorder_8_bits_loop +ret + +; require YH:YL be the address of the output +invReorder_Store_32_bits: + ldi cnt1, 4 +invreorder_8_bits_loop: + InvReorder_8_bits rmp, x0, x1, x2, x3 + st Y+, rmp + dec cnt1 + brne invreorder_8_bits_loop +ret + +; require XH:XL be the address of the input +; require YH:YL be the address of the output +Load_Reorder_Store_128_bits: + ldi cnt0, 4 +reorder_32_bits_loop: + rcall Load_Reorder_32_bits + st Y+, x0 + st Y+, x1 + st Y+, x2 + st Y+, x3 + dec cnt0 + brne reorder_32_bits_loop +ret + +; require XH:XL be the address of the input +; require YH:YL be the address of the output +Load_invReorder_Store_128_bits: + ldi cnt0, 4 +invreorder_32_bits_loop: + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + rcall invReorder_Store_32_bits + dec cnt0 + brne invreorder_32_bits_loop +ret + +.macro PUSH_ALL +push r2 +push r3 +push r4 +push r5 +push r6 +push r7 +push r8 +push r9 +push r10 +push r11 +push r12 +push r13 +push r14 +push r15 +push r16 +push r17 +push r28 +push r29 +.endm + +.macro POP_ALL +pop r29 +pop r28 +pop r17 +pop r16 +pop r15 +pop r14 +pop r13 +pop r12 +pop r11 +pop r10 +pop r9 +pop r8 +pop r7 +pop r6 +pop r5 +pop r4 +pop r3 +pop r2 +clr r1 +.endm diff --git a/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/crypto_hash.h b/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/crypto_hash.h new file mode 100644 index 0000000..342a639 --- /dev/null +++ b/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/crypto_hash.h @@ -0,0 +1,13 @@ +#ifdef __cplusplus +extern "C" { +#endif + +int crypto_hash( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen + ); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/encrypt_core.S b/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/encrypt_core.S new file mode 100644 index 0000000..dabe731 --- /dev/null +++ b/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/encrypt_core.S @@ -0,0 +1,734 @@ +; +; ********************************************** +; * PHOTON-Beetle * +; * Authenticated Encryption and Hash Family * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.0 2020 by PHOTON-Beetle Team * +; ********************************************** +; +#include "api.h" + +;#define CRYPTO_AEAD +#define CRYPTO_HASH + +#define STATE_INBITS 256 +#define STATE_INBYTES ((STATE_INBITS + 7) / 8) +#define RATE_INBITS 128 +#define RATE_INBYTES ((RATE_INBITS + 7) / 8) +#define RATE_INBYTES_MASK (RATE_INBYTES - 1) +#define TAG_MATCH 0 +#define TAG_UNMATCH -1 +#define OTHER_FAILURES -2 + + +; For CRYPTO_HASH +#define INITIAL_RATE_INBITS 128 +#define INITIAL_RATE_INBYTES ((INITIAL_RATE_INBITS + 7) / 8) +#define HASH_RATE_INBITS 32 +#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8) +#define HASH_RATE_INBYTES_MASK (HASH_RATE_INBYTES - 1) + +; +; ============================================ +; R E G I S T E R D E F I N I T I O N S +; ============================================ +; +; [Add all register names here, include info on +; all used registers without specific names] +; rmp: Multipurpose register +#define rmp r16 +#define rate r17 +#define mclen r18 +#define radlen r19 +#define adlen_org r0 + +#define cnt0 r20 +#define cnt1 r21 +#define cnt2 r22 + +#define SQUEEZE_RATE_INBITS 128 +#define SQUEEZE_RATE_INBYTES ((SQUEEZE_RATE_INBITS + 7) / 8) +#define SQUEEZE_RATE_MASK (SQUEEZE_RATE_INBYTES - 1) + +#define CAPACITY_INBITS (STATE_INBITS - RATE_INBITS) +#define CAPACITY_INBYTES ((CAPACITY_INBITS + 7) / 8) + +; For CRYPTO_AEAD +#define KEY_INBITS (CRYPTO_KEYBYTES * 8) +#define KEY_INBYTES (CRYPTO_KEYBYTES) + +#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8) +#define NONCE_INBYTES (CRYPTO_NPUBBYTES) + +#define TAG_INBITS (CRYPTO_ABYTES * 8) +#define TAG_INBYTES (CRYPTO_ABYTES) + +#define t0 r8 +#define t1 r9 +#define t2 r10 +#define t3 r11 + +#define x0 r12 +#define x1 r13 +#define x2 r14 +#define x3 r15 + +#define ed r1 + +#define addr0 r2 +#define addr1 r3 +#define addr2 r4 +#define addr3 r5 +#define addr4 r6 +#define addr5 r7 + +; domain_cnt overlap with cnt0, only temporarily used, no need to back up +#define domain_cnt r20 +#define domain_cnt0 r23 +#define domain_cnt1 r24 + +.section .noinit + SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + .BYTE 0, 0, 0, 0, 0, 0, 0, 0 + SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0 + SRAM_MESSAGE_IN_LEN: .BYTE 0, 0 +#ifdef CRYPTO_AEAD +; For CRYPTO_AEAD + SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0 + SRAM_ADLEN: .BYTE 0, 0 + SRAM_NONCE_ADDR: .BYTE 0, 0 + SRAM_KEY_ADDR: .BYTE 0, 0 + SRAM_ADDITIONAL: .BYTE 0, 0, 0, 0 + .BYTE 0, 0, 0, 0 + .BYTE 0, 0, 0, 0 + .BYTE 0, 0, 0, 0 +#endif + +; SRAM required additionally, besides those used for API + SRAM_PAD: .BYTE 0, 0, 0, 0 +#if ((defined(CRYPTO_AEAD) && (RATE_INBYTES > 4)) || defined(CRYPTO_HASH)) + .BYTE 0, 0, 0, 0 + .BYTE 0, 0, 0, 0 + .BYTE 0, 0, 0, 0 +#endif + + +.section .text + +#include "assist.h" +#include "photon.h" + +AddDomainCounter: + ldi YH, hi8(SRAM_STATE + STATE_INBYTES - 3) + ldi YL, lo8(SRAM_STATE + STATE_INBYTES - 3) + ldi rmp, 0x80 + ldi cnt1, 3 +check_domain_bit: + ror domain_cnt + brcc no_xor + ld x0, Y + eor x0, rmp + st Y, x0 +no_xor: + adiw YL, 1 + dec cnt1 + brne check_domain_bit +ret + +; require XH:XL be the address of the current associated data/message block +XOR_to_State: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + mov cnt0, rate + dec cnt0 +XOR_to_State_loop: + rcall Load_Reorder_32_bits + ld rmp, Y + eor rmp, x0 + st Y+, rmp + ld rmp, Y + eor rmp, x1 + st Y+, rmp + ld rmp, Y + eor rmp, x2 + st Y+, rmp + ld rmp, Y + eor rmp, x3 + st Y+, rmp + subi cnt0, 4 + brsh XOR_to_State_loop +; XH:XL are now the address of the next associated data/message block if this is not the last block +ret + +; require XH:XL pointed to the source data to be padded +PAD_OneZero: + ldi YH, hi8(SRAM_PAD) + ldi YL, lo8(SRAM_PAD) + mov cnt1, rate +pad_copy: + ld rmp, X+ + st Y+, rmp + dec cnt1 + dec cnt0 + brne pad_copy +pad_one: + ldi rmp, 1 + st Y+, rmp + dec cnt1 + breq pad_end + clr rmp +pad_zero: + st Y+, rmp + dec cnt1 + brne pad_zero +pad_end: + ldi XH, hi8(SRAM_PAD) + ldi XL, lo8(SRAM_PAD) +; XH:XL are now pointed to last block needed to be processed +ret + +HASH: + movw addr0, XL +hash_block_loop: + rcall PHOTON_Permutation + movw XL, addr0 + cp rate, radlen + brsh hash_last_block + rcall XOR_to_State + movw addr0, XL + sub radlen, rate + rjmp hash_block_loop + +hash_last_block: + cp radlen, rate + breq hash_xor_domain + mov cnt0, radlen + rcall PAD_OneZero + +hash_xor_domain: + clr radlen + rcall XOR_to_State + mov domain_cnt, domain_cnt0 + rcall AddDomainCounter +ret + +TAG: + rcall PHOTON_Permutation + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + movw YL, addr2 + rcall Load_invReorder_Store_128_bits +ret + +#ifdef CRYPTO_AEAD +.IF (RATE_INBITS == 128) +XOR_to_Cipher: + mov t2, rate + cp t2, mclen + brlo XOR_to_Cipher_Start + mov t2, mclen ; backup the real length of the remaining message + +XOR_to_Cipher_Start: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + ldi YH, hi8(SRAM_ADDITIONAL) + ldi YL, lo8(SRAM_ADDITIONAL) + rcall Load_invReorder_Store_128_bits ; State move to additional SRAM and reorder + + movw XL, addr0 + movw ZL, addr2 + + ; XOR Part 2 + sbiw YL, (RATE_INBYTES>>1) ; Pointed to Part 2 + ldi cnt0, (RATE_INBYTES>>1) + cp cnt0, mclen + brlo XOR_Part2_Store_Cipher_begin + mov cnt0, mclen +XOR_Part2_Store_Cipher_begin: + sub mclen, cnt0 +XOR_Part2_Store_Cipher_loop: + ld t0, Y+ + ld x0, X+ + eor x0, t0 + st Z+, x0 + dec cnt0 + brne XOR_Part2_Store_Cipher_loop + + cpi mclen, 1 + brlo XOR_to_Cipher_END + + ; XOR (Part 1 >>> 1) + ldi cnt0, (RATE_INBYTES>>1) + cp cnt0, mclen + brlo XOR_Part1_Store_Cipher_begin + mov cnt0, mclen +XOR_Part1_Store_Cipher_begin: + sub mclen, cnt0 + ldi YH, hi8(SRAM_ADDITIONAL) + ldi YL, lo8(SRAM_ADDITIONAL) + ld t0, Y + bst t0, 0 + adiw YL, (RATE_INBYTES>>1)-1 + ld t0, Y + ror t0 + bld t0, 7 + st Y, t0 + ldi cnt1, (RATE_INBYTES>>1)-1 +ROR_part1_loop: + ld t0, -Y + ror t0 + st Y, t0 + dec cnt1 + brne ROR_part1_loop + +XOR_Part1_Store_Cipher_loop: + ld t0, Y+ + ld x0, X+ + eor x0, t0 + st Z+, x0 + dec cnt0 + brne XOR_Part1_Store_Cipher_loop + +XOR_to_Cipher_END: + tst ed + brne XOR_to_Cipher_dec + +XOR_to_Cipher_enc: + movw XL, addr0 + cp t2, rate + brsh XOR_to_Cipher_XOR_to_State + mov cnt0, t2 + rcall PAD_OneZero + rjmp XOR_to_Cipher_XOR_to_State + +XOR_to_Cipher_dec: + movw XL, addr2 + cp t2, rate + brsh XOR_to_Cipher_XOR_to_State + ; need to be padded + mov cnt0, t2 + rcall PAD_OneZero + +XOR_to_Cipher_XOR_to_State: + rcall XOR_to_State + + clr rmp + add addr0, t2 + adc addr1, rmp + add addr2, t2 + adc addr3, rmp +ret +.ELSE +; RATE_INBITS == 32 +XOR_to_Cipher: + mov t2, rate + cp t2, mclen + brlo XOR_to_Cipher_Start + mov t2, mclen ; backup the real length of the remaining message + +XOR_to_Cipher_Start: + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + ldi YH, hi8(SRAM_ADDITIONAL) + ldi YL, lo8(SRAM_ADDITIONAL) + rcall invReorder_Store_32_bits + + movw XL, addr0 + movw ZL, addr2 + + ; XOR Part 2 + sbiw YL, (RATE_INBYTES>>1) ; Pointed to Part 2 + ldi cnt0, (RATE_INBYTES>>1) + cp cnt0, mclen + brlo XOR_Part2_Store_Cipher_begin + mov cnt0, mclen +XOR_Part2_Store_Cipher_begin: + sub mclen, cnt0 +XOR_Part2_Store_Cipher_loop: + ld t0, Y+ + ld x0, X+ + eor x0, t0 + st Z+, x0 + dec cnt0 + brne XOR_Part2_Store_Cipher_loop + + cpi mclen, 1 + brlo XOR_to_Cipher_END + + ; XOR (Part 1 >>> 1) + ldi cnt0, (RATE_INBYTES>>1) + cp cnt0, mclen + brlo XOR_Part1_Store_Cipher_begin + mov cnt0, mclen +XOR_Part1_Store_Cipher_begin: + sub mclen, cnt0 + ldi YH, hi8(SRAM_ADDITIONAL) + ldi YL, lo8(SRAM_ADDITIONAL) + ld t0, Y+ + ld t1, Y+ + bst t0, 0 + ror t1 + ror t0 + bld t1, 7 + + ld x0, X+ + eor x0, t0 + st Z+, x0 + dec cnt0 + breq XOR_to_Cipher_END + ld x0, X+ + eor x0, t1 + st Z+, x0 + +XOR_to_Cipher_END: + tst ed + brne XOR_to_Cipher_dec + +XOR_to_Cipher_enc: + movw XL, addr0 + cp t2, rate + brsh XOR_to_Cipher_XOR_to_State + mov cnt0, t2 + rcall PAD_OneZero + rjmp XOR_to_Cipher_XOR_to_State + +XOR_to_Cipher_dec: + movw XL, addr2 + cp t2, rate + brsh XOR_to_Cipher_XOR_to_State + ; need to be padded + mov cnt0, t2 + rcall PAD_OneZero + +XOR_to_Cipher_XOR_to_State: + rcall XOR_to_State + + clr rmp + add addr0, t2 + adc addr1, rmp + add addr2, t2 + adc addr3, rmp +ret +.ENDIF + +ENC: + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + lds XH, SRAM_MESSAGE_IN_ADDR + lds XL, SRAM_MESSAGE_IN_ADDR + 1 + + movw addr0, XL + movw addr2, ZL + +enc_block_loop: + rcall PHOTON_Permutation + rcall XOR_to_Cipher + cpi mclen, 1 + brsh enc_block_loop + + mov domain_cnt, domain_cnt1 + rcall AddDomainCounter +ret + +AUTH_AND_ENCDEC: + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + + lds XH, SRAM_NONCE_ADDR + lds XL, SRAM_NONCE_ADDR + 1 + rcall Load_Reorder_Store_128_bits + lds XH, SRAM_KEY_ADDR + lds XL, SRAM_KEY_ADDR + 1 + rcall Load_Reorder_Store_128_bits + + ldi domain_cnt0, 1 + ldi domain_cnt1, 1 + +test_adlen_zero: + tst radlen + breq adlen_zero_test_mlen_zero + + ; radlen != 0 +adlen_nzero_test_mlen_zero: + tst mclen + brne test_adlen_divisible + ldi domain_cnt0, 3 +test_adlen_divisible: + mov rmp, radlen + andi rmp, RATE_INBYTES_MASK + breq hash_ad + inc domain_cnt0 ; 2 or 4 +hash_ad: + lds XH, SRAM_ASSOCIATED_DATA_ADDR + lds XL, SRAM_ASSOCIATED_DATA_ADDR + 1 + rcall HASH + tst mclen + breq mlen_zero_inputout_address + rjmp test_mlen_divisible + +adlen_zero_test_mlen_zero: + ldi domain_cnt1, 5 + tst mclen + breq adlen_zero_mlen_zero + + ; mclen != 0 +test_mlen_divisible: + mov rmp, mclen + andi rmp, RATE_INBYTES_MASK + breq enc_dec_m + inc domain_cnt1 ; 2 or 6 +enc_dec_m: + rcall ENC + rjmp AUTH_AND_ENCDEC_end + +adlen_zero_mlen_zero: + ; empty message and empty associated data + ldi YH, hi8(SRAM_STATE + STATE_INBYTES - 3) + ldi YL, lo8(SRAM_STATE + STATE_INBYTES - 3) + ld x0, Y + ldi rmp, 0x80 + eor x0, rmp + st Y, x0 + +mlen_zero_inputout_address: + tst ed + brne dec_inputout_address +enc_inputout_address: + lds ZH, SRAM_MESSAGE_OUT_ADDR + lds ZL, SRAM_MESSAGE_OUT_ADDR + 1 + movw addr2, ZL + rjmp AUTH_AND_ENCDEC_end +dec_inputout_address: + lds ZH, SRAM_MESSAGE_IN_ADDR + lds ZL, SRAM_MESSAGE_IN_ADDR + 1 + movw addr0, ZL + +AUTH_AND_ENCDEC_end: +ret + + +; void crypto_aead_encrypt_asm( +; unsigned char *c, +; const unsigned char *m, +; unsigned long long mlen, +; const unsigned char *ad, +; unsigned long long radlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *c, is passed in r24:r25 +; const unsigned char *m, is passed in r22:r23 +; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long radlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_encrypt_asm +crypto_aead_encrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov mclen, r20 + mov radlen, r16 + + ldi rate, RATE_INBYTES + clr ed + + rcall AUTH_AND_ENCDEC + rcall TAG + + POP_ALL +ret + +; int crypto_aead_decrypt_asm( +; unsigned char *m, +; const unsigned char *c, +; unsigned long long clen, +; const unsigned char *ad, +; unsigned long long radlen, +; const unsigned char *npub, +; const unsigned char *k +; ) +; +; unsigned char *m, is passed in r24:r25 +; const unsigned char *c, is passed in r22:r23 +; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used +; const unsigned char *ad, is passed in r18:r19 +; unsigned long long radlen, is passed in r16:r17, only LSB (r16) is used +; const unsigned char *npub, is passed in r14:r15 +; const unsigned char *k is passed in r12:r13 +.global crypto_aead_decrypt_asm +crypto_aead_decrypt_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR + st X+, r18 + st X+, r17 ;store associated data length in SRAM_ADLEN + st X+, r16 + st X+, r15 ;store nonce address in SRAM_NONCE_ADDR + st X+, r14 + st X+, r13 ;store key address in SRAM_KEY_ADDR + st X+, r12 + mov mclen, r20 + mov radlen, r16 + + ldi rate, RATE_INBYTES + clr ed + inc ed + + rcall AUTH_AND_ENCDEC + + ldi YH, hi8(SRAM_ADDITIONAL) + ldi YL, lo8(SRAM_ADDITIONAL) + movw addr2, YL + rcall TAG + + sbiw YL, CRYPTO_ABYTES + movw XL, addr0 + + ldi cnt0, CRYPTO_ABYTES +compare_tag: + ld t0, Y+ + ld x0, X+ + cp t0, x0 + brne return_tag_not_match + dec cnt0 + brne compare_tag + rjmp return_tag_match + +return_tag_not_match: + ldi r25, 0xFF + ldi r24, 0xFF + rjmp crypto_aead_decrypt_end +return_tag_match: + clr r25 + clr r24 + +crypto_aead_decrypt_end: + POP_ALL +ret + +; #ifdef CRYPTO_AEAD +#endif + +#ifdef CRYPTO_HASH +; void crypto_hash_asm( +; unsigned char *out, +; const unsigned char *in, +; unsigned long long inlen +; ) +; +; unsigned char *out, is passed in r24:r25 +; const unsigned char *in, is passed in r22:r23 +; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used +.global crypto_hash_asm +crypto_hash_asm: + PUSH_ALL + ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR) + ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR) + st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR + st X+, r24 + st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR + st X+, r22 + st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN + st X+, r20 + mov mclen, r20 + + ; empty half state + ldi YH, hi8(SRAM_STATE + INITIAL_RATE_INBYTES) + ldi YL, lo8(SRAM_STATE + INITIAL_RATE_INBYTES) + clr rmp + ldi cnt1, (STATE_INBYTES - INITIAL_RATE_INBYTES) +zero_state: + st Y+, rmp + dec cnt1 + brne zero_state + + ldi domain_cnt0, 1 + sbiw YL, STATE_INBYTES + lds XH, SRAM_MESSAGE_IN_ADDR + lds XL, SRAM_MESSAGE_IN_ADDR + 1 + + tst mclen + breq add_domain + +test_mlen_initrate: + ; mclen != 0 + cpi mclen, INITIAL_RATE_INBYTES + brlo less_than_initial_rate + breq equal_to_initial_rate + +more_than_initial_rate: + rcall Load_Reorder_Store_128_bits + ldi rate, HASH_RATE_INBYTES + mov radlen, mclen + subi radlen, INITIAL_RATE_INBYTES + mov rmp, radlen + andi rmp, HASH_RATE_INBYTES_MASK + breq hash_message + inc domain_cnt0 +hash_message: + rcall HASH + rjmp gen_digest + +equal_to_initial_rate: + inc domain_cnt0 + rcall Load_Reorder_Store_128_bits + rjmp add_domain + +less_than_initial_rate: + mov cnt0, mclen + ldi rate, INITIAL_RATE_INBYTES + rcall PAD_OneZero + ldi YH, hi8(SRAM_STATE) + ldi YL, lo8(SRAM_STATE) + rcall Load_Reorder_Store_128_bits + rjmp add_domain + +add_domain: + mov domain_cnt, domain_cnt0 + rcall AddDomainCounter +gen_digest: + lds XH, SRAM_MESSAGE_OUT_ADDR + lds XL, SRAM_MESSAGE_OUT_ADDR + 1 + movw addr2, XL + rcall TAG + movw XL, addr2 + adiw XL, SQUEEZE_RATE_INBYTES + movw addr2, XL + rcall TAG + + POP_ALL +ret + +#endif \ No newline at end of file diff --git a/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/hash.c b/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/hash.c new file mode 100644 index 0000000..dbbe4df --- /dev/null +++ b/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/hash.c @@ -0,0 +1,32 @@ +#include +#include +#include +#include +#include "api.h" +#include "crypto_hash.h" + +extern void crypto_hash_asm( + unsigned char *out, + const unsigned char *in, + unsigned char inlen + ); + +int crypto_hash( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen +) +{ + /* + ... + ... the code for the hash function implementation goes here + ... generating a hash value out[0],out[1],...,out[CRYPTO_BYTES-1] + ... from a message in[0],in[1],...,in[in-1] + ... + ... return 0; + */ + + crypto_hash_asm(out, in, inlen); + + return 0; +} \ No newline at end of file diff --git a/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/photon.h b/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/photon.h new file mode 100644 index 0000000..d764e16 --- /dev/null +++ b/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_speed/photon.h @@ -0,0 +1,714 @@ +; +; ********************************************** +; * PHOTON-Beetle * +; * Authenticated Encryption and Hash Family * +; * * +; * Assembly implementation for 8-bit AVR CPU * +; * Version 1.1 2020 by PHOTON-Beetle Team * +; ********************************************** +; +#define ROUND_N 12 +#define DIM 8 + +.MACRO Store_OneRow + st X+, x0 + st X+, x1 + st X+, x2 + st X+, x3 +.ENDM + +.MACRO ROTL_1 i0 + bst \i0, 7 + lsl \i0 + bld \i0, 0 +.ENDM + +.MACRO ROTR_1 i0 + bst \i0, 0 + lsr \i0 + bld \i0, 7 +.ENDM + +.MACRO ROTR_4 i0 + swap \i0 +.ENDM + +.MACRO ROTR_1_ROW + ROTR_1 x0 + ROTR_1 x1 + ROTR_1 x2 + ROTR_1 x3 +.ENDM + +.MACRO ROTL_1_ROW + ROTL_1 x0 + ROTL_1 x1 + ROTL_1 x2 + ROTL_1 x3 +.ENDM + +.MACRO ROTR_4_ROW + ROTR_4 x0 + ROTR_4 x1 + ROTR_4 x2 + ROTR_4 x3 +.ENDM + + +; For all mul2_GF16_0x13_xor: +; Input +; MSB........LSB +; x0=@0: x1=@1: x2=@2: x3=@3 + ; # define mul2_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \ + ; x3 = XOR (x3 ,x0); \ + ; } while (0) ; /* Output : ( MSB ) x1 ,x2 ,x3 , x0 ( LSB ) */ +.MACRO mul2_GF16_0x13_xor + ld t3, X+ + ld t2, X+ + ld t1, X+ + ld t0, X+ + eor t3, t0 + eor x0, t0 + eor x1, t3 + eor x2, t2 + eor x3, t1 +.ENDM + + ; # define mul4_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \ + ; x3 = XOR (x3 ,x0); x0 = XOR (x0 ,x1); \ + ; } while (0) ; /* Output : ( MSB ) x2 ,x3 ,x0 , x1 ( LSB ) */ +.MACRO mul4_GF16_0x13_xor + ld t3, X+ + ld t2, X+ + ld t1, X+ + ld t0, X+ + eor t3, t0 + eor t0, t1 + eor x0, t1 + eor x1, t0 + eor x2, t3 + eor x3, t2 +.ENDM + + ; # define mul5_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \ + ; x2 = XOR (x2 ,x0); x3 = XOR (x3 ,x1); \ + ; x1 = XOR (x1 ,x2); x0 = XOR (x0 ,x3); \ + ; } while (0) ; /* Output : ( MSB ) x2 ,x0 ,x1 , x3 ( LSB ) */ +.MACRO mul5_GF16_0x13_xor + ld t3, X+ + ld t2, X+ + ld t1, X+ + ld t0, X+ + eor t2, t0 + eor t3, t1 + eor t1, t2 + eor t0, t3 + eor x0, t3 + eor x1, t1 + eor x2, t0 + eor x3, t2 +.ENDM + + ; # define mul6_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \ + ; x3 = XOR (x3 ,x1); x1 = XOR (x1 ,x0); \ + ; x2 = XOR (x2 ,x1); x0 = XOR (x0 ,x2); \ + ; x2 = XOR (x2 ,x3); \ + ; } while (0) ; /* Output : ( MSB ) x0 ,x2 ,x3 , x1 ( LSB ) */ +.MACRO mul6_GF16_0x13_xor + ld t3, X+ + ld t2, X+ + ld t1, X+ + ld t0, X+ + eor t3, t1 + eor t1, t0 + eor t2, t1 + eor t0, t2 + eor t2, t3 + eor x0, t1 + eor x1, t3 + eor x2, t2 + eor x3, t0 +.ENDM + + ; # define mul8_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \ + ; x3 = XOR (x3 ,x0); x0 = XOR (x0 ,x1); \ + ; x1 = XOR (x1 ,x2); \ + ; } while (0) ; /* Output : ( MSB ) x3 ,x0 ,x1 , x2 ( LSB ) */ +.MACRO mul8_GF16_0x13_xor + ld t3, X+ + ld t2, X+ + ld t1, X+ + ld t0, X+ + eor t3, t0 + eor t0, t1 + eor t1, t2 + eor x0, t2 + eor x1, t1 + eor x2, t0 + eor x3, t3 +.ENDM + + ; # define mul11_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \ + ; x2 = XOR (x2 ,x0); x1 = XOR (x1 ,x3); \ + ; x0 = XOR (x0 ,x1); x3 = XOR (x3 ,x2); \ + ; } while (0) ; /* Output : ( MSB ) x1 ,x2 ,x0 , x3 ( LSB ) */ +.MACRO mulb_GF16_0x13_xor + ld t3, X+ + ld t2, X+ + ld t1, X+ + ld t0, X+ + eor t2, t0 + eor t1, t3 + eor t0, t1 + eor t3, t2 + eor x0, t3 + eor x1, t0 + eor x2, t2 + eor x3, t1 +.ENDM + + +.MACRO RoundFunction +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Start AddRC_Sbox_ShiftRow +AddRC_Sbox_ShiftRow_Start: + clr t3 + inc t3 + + ldi XL, lo8(SRAM_STATE) + ldi XH, hi8(SRAM_STATE) + + lpm t0, Z+ ; Load two nibbles of round constant for row 0, 1 + ; AddRC_TwoRows + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + sbiw XL, 4 + + ror t0 + brcc row0_next1 + eor x0, t3 +row0_next1: + ror t0 + brcc row0_next2 + eor x1, t3 +row0_next2: + ror t0 + brcc row0_next3 + eor x2, t3 +row0_next3: + ror t0 + brcc row0_next4 + eor x3, t3 +row0_next4: + ; Sbox_TwoRows + eor x1, x2 + mov t1, x2 + and t1, x1 + eor x3, t1 + mov t1, x3 + and x3, x1 + eor x3, x2 + mov t2, x3 + eor x3, x0 + com x3 + mov x2, x3 + or t2, x0 + eor x0, t1 + eor x1, x0 + or x2, x1 + eor x2, t1 + eor x1, t2 + eor x3, x1 + Store_OneRow + + ; AddRC_TwoRows + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + sbiw XL, 4 + + ror t0 + brcc row1_next1 + eor x0, t3 +row1_next1: + ror t0 + brcc row1_next2 + eor x1, t3 +row1_next2: + ror t0 + brcc row1_next3 + eor x2, t3 +row1_next3: + ror t0 + brcc row1_next4 + eor x3, t3 +row1_next4: + ; Sbox_TwoRows + eor x1, x2 + mov t1, x2 + and t1, x1 + eor x3, t1 + mov t1, x3 + and x3, x1 + eor x3, x2 + mov t2, x3 + eor x3, x0 + com x3 + mov x2, x3 + or t2, x0 + eor x0, t1 + eor x1, x0 + or x2, x1 + eor x2, t1 + eor x1, t2 + eor x3, x1 + + ROTR_1_ROW + Store_OneRow + + lpm t0, Z+ ; Load two nibbles of round constant for row 2i, 2i+1 + ; AddRC_TwoRows + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + sbiw XL, 4 + + ror t0 + brcc row2_next1 + eor x0, t3 +row2_next1: + ror t0 + brcc row2_next2 + eor x1, t3 +row2_next2: + ror t0 + brcc row2_next3 + eor x2, t3 +row2_next3: + ror t0 + brcc row2_next4 + eor x3, t3 +row2_next4: + ; Sbox_TwoRows + eor x1, x2 + mov t1, x2 + and t1, x1 + eor x3, t1 + mov t1, x3 + and x3, x1 + eor x3, x2 + mov t2, x3 + eor x3, x0 + com x3 + mov x2, x3 + or t2, x0 + eor x0, t1 + eor x1, x0 + or x2, x1 + eor x2, t1 + eor x1, t2 + eor x3, x1 + + ROTR_1_ROW + ROTR_1_ROW + Store_OneRow + + ; AddRC_TwoRows + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + sbiw XL, 4 + + ror t0 + brcc row3_next1 + eor x0, t3 +row3_next1: + ror t0 + brcc row3_next2 + eor x1, t3 +row3_next2: + ror t0 + brcc row3_next3 + eor x2, t3 +row3_next3: + ror t0 + brcc row3_next4 + eor x3, t3 +row3_next4: + ; Sbox_TwoRows + eor x1, x2 + mov t1, x2 + and t1, x1 + eor x3, t1 + mov t1, x3 + and x3, x1 + eor x3, x2 + mov t2, x3 + eor x3, x0 + com x3 + mov x2, x3 + or t2, x0 + eor x0, t1 + eor x1, x0 + or x2, x1 + eor x2, t1 + eor x1, t2 + eor x3, x1 + + ROTR_4_ROW + ROTL_1_ROW + Store_OneRow + + lpm t0, Z+ ; Load two nibbles of round constant for row 2i, 2i+1 + ; AddRC_TwoRows + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + sbiw XL, 4 + + ror t0 + brcc row4_next1 + eor x0, t3 +row4_next1: + ror t0 + brcc row4_next2 + eor x1, t3 +row4_next2: + ror t0 + brcc row4_next3 + eor x2, t3 +row4_next3: + ror t0 + brcc row4_next4 + eor x3, t3 +row4_next4: + ; Sbox_TwoRows + eor x1, x2 + mov t1, x2 + and t1, x1 + eor x3, t1 + mov t1, x3 + and x3, x1 + eor x3, x2 + mov t2, x3 + eor x3, x0 + com x3 + mov x2, x3 + or t2, x0 + eor x0, t1 + eor x1, x0 + or x2, x1 + eor x2, t1 + eor x1, t2 + eor x3, x1 + + ROTR_4_ROW + Store_OneRow + + ; AddRC_TwoRows + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + sbiw XL, 4 + + ror t0 + brcc row5_next1 + eor x0, t3 +row5_next1: + ror t0 + brcc row5_next2 + eor x1, t3 +row5_next2: + ror t0 + brcc row5_next3 + eor x2, t3 +row5_next3: + ror t0 + brcc row5_next4 + eor x3, t3 +row5_next4: + ; Sbox_TwoRows + eor x1, x2 + mov t1, x2 + and t1, x1 + eor x3, t1 + mov t1, x3 + and x3, x1 + eor x3, x2 + mov t2, x3 + eor x3, x0 + com x3 + mov x2, x3 + or t2, x0 + eor x0, t1 + eor x1, x0 + or x2, x1 + eor x2, t1 + eor x1, t2 + eor x3, x1 + + ROTR_4_ROW + ROTR_1_ROW + Store_OneRow + + lpm t0, Z+ ; Load two nibbles of round constant for row 2i, 2i+1 + ; AddRC_TwoRows + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + sbiw XL, 4 + + ror t0 + brcc row6_next1 + eor x0, t3 +row6_next1: + ror t0 + brcc row6_next2 + eor x1, t3 +row6_next2: + ror t0 + brcc row6_next3 + eor x2, t3 +row6_next3: + ror t0 + brcc row6_next4 + eor x3, t3 +row6_next4: + ; Sbox_TwoRows + eor x1, x2 + mov t1, x2 + and t1, x1 + eor x3, t1 + mov t1, x3 + and x3, x1 + eor x3, x2 + mov t2, x3 + eor x3, x0 + com x3 + mov x2, x3 + or t2, x0 + eor x0, t1 + eor x1, x0 + or x2, x1 + eor x2, t1 + eor x1, t2 + eor x3, x1 + + ROTL_1_ROW + ROTL_1_ROW + Store_OneRow + + ; AddRC_TwoRows + ld x0, X+ + ld x1, X+ + ld x2, X+ + ld x3, X+ + sbiw XL, 4 + + ror t0 + brcc row7_next1 + eor x0, t3 +row7_next1: + ror t0 + brcc row7_next2 + eor x1, t3 +row7_next2: + ror t0 + brcc row7_next3 + eor x2, t3 +row7_next3: + ror t0 + brcc row7_next4 + eor x3, t3 +row7_next4: + ; Sbox_TwoRows + eor x1, x2 + mov t1, x2 + and t1, x1 + eor x3, t1 + mov t1, x3 + and x3, x1 + eor x3, x2 + mov t2, x3 + eor x3, x0 + com x3 + mov x2, x3 + or t2, x0 + eor x0, t1 + eor x1, x0 + or x2, x1 + eor x2, t1 + eor x1, t2 + eor x3, x1 + + ROTL_1_ROW + Store_OneRow + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Start MixColumns +MC_Start: + + ldi XH, hi8(SRAM_STATE) + ldi XL, lo8(SRAM_STATE) + movw YL, XL +A0: + clr x0 + clr x1 + clr x2 + clr x3 + mul2_GF16_0x13_xor + mul4_GF16_0x13_xor + mul2_GF16_0x13_xor + mulb_GF16_0x13_xor + mul2_GF16_0x13_xor + mul8_GF16_0x13_xor + mul5_GF16_0x13_xor + mul6_GF16_0x13_xor + movw XL, YL + Store_OneRow + +A1: + clr x0 + clr x1 + clr x2 + clr x3 + mul2_GF16_0x13_xor + mul4_GF16_0x13_xor + mul2_GF16_0x13_xor + mulb_GF16_0x13_xor + mul2_GF16_0x13_xor + mul8_GF16_0x13_xor + mul5_GF16_0x13_xor + movw XL, YL + mul6_GF16_0x13_xor + Store_OneRow + +A2: + clr x0 + clr x1 + clr x2 + clr x3 + mul2_GF16_0x13_xor + mul4_GF16_0x13_xor + mul2_GF16_0x13_xor + mulb_GF16_0x13_xor + mul2_GF16_0x13_xor + mul8_GF16_0x13_xor + movw XL, YL + mul5_GF16_0x13_xor + mul6_GF16_0x13_xor + Store_OneRow + +A3: + clr x0 + clr x1 + clr x2 + clr x3 + mul2_GF16_0x13_xor + mul4_GF16_0x13_xor + mul2_GF16_0x13_xor + mulb_GF16_0x13_xor + mul2_GF16_0x13_xor + movw XL, YL + mul8_GF16_0x13_xor + mul5_GF16_0x13_xor + mul6_GF16_0x13_xor + Store_OneRow + +A4: + clr x0 + clr x1 + clr x2 + clr x3 + mul2_GF16_0x13_xor + mul4_GF16_0x13_xor + mul2_GF16_0x13_xor + mulb_GF16_0x13_xor + movw XL, YL + mul2_GF16_0x13_xor + mul8_GF16_0x13_xor + mul5_GF16_0x13_xor + mul6_GF16_0x13_xor + Store_OneRow + +A5: + clr x0 + clr x1 + clr x2 + clr x3 + mul2_GF16_0x13_xor + mul4_GF16_0x13_xor + mul2_GF16_0x13_xor + movw XL, YL + mulb_GF16_0x13_xor + mul2_GF16_0x13_xor + mul8_GF16_0x13_xor + mul5_GF16_0x13_xor + mul6_GF16_0x13_xor + Store_OneRow + +A6: + clr x0 + clr x1 + clr x2 + clr x3 + mul2_GF16_0x13_xor + mul4_GF16_0x13_xor + movw XL, YL + mul2_GF16_0x13_xor + mulb_GF16_0x13_xor + mul2_GF16_0x13_xor + mul8_GF16_0x13_xor + mul5_GF16_0x13_xor + mul6_GF16_0x13_xor + Store_OneRow + +A7: + clr x0 + clr x1 + clr x2 + clr x3 + mul2_GF16_0x13_xor + movw XL, YL + mul4_GF16_0x13_xor + mul2_GF16_0x13_xor + mulb_GF16_0x13_xor + mul2_GF16_0x13_xor + mul8_GF16_0x13_xor + mul5_GF16_0x13_xor + mul6_GF16_0x13_xor + Store_OneRow +.ENDM + +PHOTON_Permutation: + ldi ZH, hi8(RC) + ldi ZL, lo8(RC) + ldi cnt2, ROUND_N +round_loop_start: + RoundFunction + dec cnt2 + breq round_loop_end + jmp round_loop_start +round_loop_end: +ret + +.section .text +RC: +.byte 0x01,0x62,0xFE,0x9D +.byte 0x23,0x40,0xDC,0xBF +.byte 0x67,0x04,0x98,0xFB +.byte 0xFE,0x9D,0x01,0x62 +.byte 0xCD,0xAE,0x32,0x51 +.byte 0xAB,0xC8,0x54,0x37 +.byte 0x76,0x15,0x89,0xEA +.byte 0xDC,0xBF,0x23,0x40 +.byte 0x89,0xEA,0x76,0x15 +.byte 0x32,0x51,0xCD,0xAE +.byte 0x45,0x26,0xBA,0xD9 +.byte 0xBA,0xD9,0x45,0x26 -- libgit2 0.26.0