avr8_lowrom for photon-beetle

4f2227ae · 包珍珍 · Enrico Pozzobon · 7860b7c6 · 4f2227ae · 4f2227ae
Commit 4f2227ae authored Jul 25, 2020 by 包珍珍 Committed by Enrico Pozzobon Jul 25, 2020
18 changed files
--- a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_lowrom/api.h
+++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_lowrom/api.h
+#define CRYPTO_KEYBYTES     16
+#define CRYPTO_NSECBYTES    0
+#define CRYPTO_NPUBBYTES    16
+#define CRYPTO_ABYTES       16
+#define CRYPTO_NOOVERLAP    1
--- a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_lowrom/assist.h
+++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_lowrom/assist.h
+;
+; **********************************************
+; * PHOTON-Beetle                              *
+; * Authenticated Encryption and Hash Family   *
+; *                                            *
+; * Assembly implementation for 8-bit AVR CPU  *
+; * Version 1.0 2020 by PHOTON-Beetle Team     *
+; **********************************************
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Bitslice
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+.MACRO Reorder_8_bits i0, i1, i2, i3, i4
+    ror \i0
+    ror \i1
+    ror \i0
+    ror \i2
+    ror \i0
+    ror \i3
+    ror \i0
+    ror \i4 
+    ror \i0
+    ror \i1
+    ror \i0
+    ror \i2
+    ror \i0
+    ror \i3
+    ror \i0
+    ror \i4 
+.ENDM
+
+.MACRO InvReorder_8_bits i0, i1, i2, i3, i4
+    ror \i1
+    ror \i0
+    ror \i2
+    ror \i0
+    ror \i3
+    ror \i0
+    ror \i4 
+    ror \i0
+    ror \i1
+    ror \i0
+    ror \i2
+    ror \i0
+    ror \i3
+    ror \i0
+    ror \i4 
+    ror \i0
+.ENDM
+
+; require XH:XL be the address of the input
+Load_Reorder_32_bits:
+    ldi cnt1, 4
+reorder_8_bits_loop:
+    ld rmp, X+
+    Reorder_8_bits rmp, x0, x1, x2, x3
+    dec cnt1
+    brne reorder_8_bits_loop
+ret
+
+; require YH:YL be the address of the output
+invReorder_Store_32_bits:
+    ldi cnt1, 4
+invreorder_8_bits_loop:
+    InvReorder_8_bits rmp, x0, x1, x2, x3
+    st Y+, rmp
+    dec cnt1
+    brne invreorder_8_bits_loop
+ret
+
+; require XH:XL be the address of the input
+; require YH:YL be the address of the output
+Load_Reorder_Store_128_bits:
+    ldi cnt0, 4
+reorder_32_bits_loop:
+    rcall Load_Reorder_32_bits
+    st Y+, x0
+    st Y+, x1
+    st Y+, x2
+    st Y+, x3
+    dec cnt0
+    brne reorder_32_bits_loop
+ret
+
+; require XH:XL be the address of the input
+; require YH:YL be the address of the output
+Load_invReorder_Store_128_bits:
+    ldi cnt0, 4
+invreorder_32_bits_loop:
+    ld x0, X+
+    ld x1, X+
+    ld x2, X+
+    ld x3, X+
+    rcall invReorder_Store_32_bits
+    dec cnt0
+    brne invreorder_32_bits_loop
+ret
+
+.macro PUSH_ALL
+push    r2
+push    r3
+push    r4
+push    r5
+push    r6
+push    r7
+push    r8
+push    r9
+push    r10
+push    r11
+push    r12
+push    r13
+push    r14
+push    r15
+push    r16
+push    r17
+push    r28
+push    r29
+.endm
+
+.macro POP_ALL
+pop    r29
+pop    r28
+pop    r17
+pop    r16
+pop    r15
+pop    r14
+pop    r13
+pop    r12
+pop    r11
+pop    r10
+pop    r9
+pop    r8
+pop    r7
+pop    r6
+pop    r5
+pop    r4
+pop    r3
+pop    r2
+clr    r1
+.endm
--- a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_lowrom/crypto_aead.h
+++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_lowrom/crypto_aead.h
+#ifdef __cplusplus
+extern "C" {
+#endif
+    
+int crypto_aead_encrypt(
+	unsigned char *c,unsigned long long *clen,
+	const unsigned char *m,unsigned long long mlen,
+	const unsigned char *ad,unsigned long long adlen,
+	const unsigned char *nsec,
+	const unsigned char *npub,
+	const unsigned char *k
+	); 
+
+
+int crypto_aead_decrypt(
+	unsigned char *m,unsigned long long *outputmlen,
+	unsigned char *nsec,
+	const unsigned char *c,unsigned long long clen,
+	const unsigned char *ad,unsigned long long adlen,
+	const unsigned char *npub,
+	const unsigned char *k
+	); 
+
+#ifdef __cplusplus
+}
+#endif
--- a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_lowrom/encrypt.c
+++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_lowrom/encrypt.c
+#include <avr/io.h>
+#include <avr/sfr_defs.h>
+#include <stdlib.h>
+#include <string.h>
+#include "api.h"
+
+extern void crypto_aead_encrypt_asm(
+      unsigned char *c,         
+      const unsigned char *m,   
+      unsigned char mlen,  
+      const unsigned char *ad,  
+      unsigned char adlen, 
+      const unsigned char *npub,
+      const unsigned char *k    
+      );
+
+extern char crypto_aead_decrypt_asm(
+     unsigned char *m,          
+     const unsigned char *c,    
+     unsigned char clen,   
+     const unsigned char *ad,   
+     unsigned char adlen,  
+     const unsigned char *npub, 
+     const unsigned char *k     
+     );
+
+extern void crypto_hash_asm(
+    unsigned char *out,
+    const unsigned char *in,
+    unsigned char inlen
+    );
+
+
+int crypto_aead_encrypt(
+    unsigned char *c, unsigned long long *clen,
+    const unsigned char *m, unsigned long long mlen,
+    const unsigned char *ad, unsigned long long adlen,
+    const unsigned char *nsec,
+    const unsigned char *npub,
+    const unsigned char *k
+    )
+{
+    /*
+    ... 
+    ... the code for the cipher implementation goes here,
+    ... generating a ciphertext c[0],c[1],...,c[*clen-1]
+    ... from a plaintext m[0],m[1],...,m[mlen-1]
+    ... and associated data ad[0],ad[1],...,ad[adlen-1]
+    ... and nonce npub[0],npub[1],..
+    ... and secret key k[0],k[1],...
+    ... the implementation shall not use nsec
+    ...
+    ... return 0;
+    */
+
+    (void)nsec;
+
+    crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k);
+
+    *clen = mlen + CRYPTO_ABYTES;
+    return 0;
+}
+
+
+
+int crypto_aead_decrypt(
+    unsigned char *m, unsigned long long *mlen,
+    unsigned char *nsec,
+    const unsigned char *c, unsigned long long clen,
+    const unsigned char *ad, unsigned long long adlen,
+    const unsigned char *npub,
+    const unsigned char *k
+    )
+{
+    /*
+    ...
+    ... the code for the AEAD implementation goes here,
+    ... generating a plaintext m[0],m[1],...,m[*mlen-1]
+    ... and secret message number nsec[0],nsec[1],...
+    ... from a ciphertext c[0],c[1],...,c[clen-1]
+    ... and associated data ad[0],ad[1],...,ad[adlen-1]
+    ... and nonce number npub[0],npub[1],...
+    ... and secret key k[0],k[1],...
+    ...
+    ... return 0;
+    */
+    unsigned long long    mlen_;
+    char tag_is_match;
+
+    (void)nsec;
+    if (clen < CRYPTO_ABYTES) {
+        return -1;
+    }
+    mlen_ = clen - CRYPTO_ABYTES;
+
+    tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k);
+
+    if (tag_is_match != 0)
+    {
+        memset(m, 0, (size_t)mlen_);
+        return -1;
+    }
+    *mlen = mlen_;
+    return 0;
+}
\ No newline at end of file
--- a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_lowrom/encrypt_core.S
+++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_lowrom/encrypt_core.S
--- a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_lowrom/photon.h
+++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate128v1/avr8_lowrom/photon.h
+;
+; **********************************************
+; * PHOTON-Beetle                              *
+; * Authenticated Encryption and Hash Family   *
+; *                                            *
+; * Assembly implementation for 8-bit AVR CPU  *
+; * Version 1.0 2020 by PHOTON-Beetle Team     *
+; **********************************************
+;
+#define ROUND_N  12
+#define DIM      8
+
+.MACRO Store_OneRow
+    st X+, x0
+    st X+, x1
+    st X+, x2
+    st X+, x3
+.ENDM
+
+.MACRO ROTL_1 i0
+    bst  \i0, 7
+    lsl  \i0
+    bld  \i0, 0
+.ENDM
+
+.MACRO ROTR_1 i0
+    bst  \i0, 0
+    lsr  \i0
+    bld  \i0, 7
+.ENDM
+
+.MACRO ROTR_4 i0
+    swap \i0
+.ENDM
+
+ROTR_1_ROW:
+    ROTR_1 x0
+    ROTR_1 x1
+    ROTR_1 x2
+    ROTR_1 x3
+ret
+
+ROTL_1_ROW:
+    ROTL_1 x0
+    ROTL_1 x1
+    ROTL_1 x2
+    ROTL_1 x3
+ret
+
+ROTR_4_ROW:
+    ROTR_4 x0
+    ROTR_4 x1
+    ROTR_4 x2
+    ROTR_4 x3
+ret
+
+RoundFunction:
+
+    rjmp AddRC_Sbox_ShiftRow_Start
+
+ShiftRow_routine_table:
+    rjmp ShiftRow_RecoverZ_NoLPM
+    rjmp ShiftRow_1
+    rjmp ShiftRow_2
+    rjmp ShiftRow_3
+    rjmp ShiftRow_4
+    rjmp ShiftRow_5
+    rjmp ShiftRow_6
+    rjmp ShiftRow_7
+
+ShiftRow_1:
+    rcall ROTR_1_ROW
+    rjmp ShiftRow_RecoverZ_LPM
+
+ShiftRow_2:
+    rcall ROTR_1_ROW
+    rcall ROTR_1_ROW
+    rjmp ShiftRow_RecoverZ_NoLPM
+
+ShiftRow_3:
+    rcall ROTR_4_ROW
+    rcall ROTL_1_ROW
+    rjmp ShiftRow_RecoverZ_LPM
+
+ShiftRow_4:
+    rcall ROTR_4_ROW
+    rjmp ShiftRow_RecoverZ_NoLPM
+
+ShiftRow_5:
+    rcall ROTR_4_ROW
+    rcall ROTR_1_ROW
+    rjmp ShiftRow_RecoverZ_LPM
+
+ShiftRow_6:
+    rcall ROTL_1_ROW
+    rcall ROTL_1_ROW
+    rjmp ShiftRow_RecoverZ_NoLPM
+
+ShiftRow_7:
+    rcall ROTL_1_ROW
+    rjmp ShiftRow_RecoverZ_NoLPM
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Start AddRC_Sbox_ShiftRow
+AddRC_Sbox_ShiftRow_Start:
+    clr t3
+    inc t3
+
+    ldi XL, lo8(SRAM_STATE)
+    ldi XH, hi8(SRAM_STATE)
+
+    ldi YL, pm_lo8(ShiftRow_routine_table)
+    ldi YH, pm_hi8(ShiftRow_routine_table)
+    ldi rmp, DIM
+
+    lpm t0, Z+ ; Load two nibbles of round constant for row 0, 1
+AddRC_Sbox_ShiftRow_Loop:
+    ; AddRC_TwoRows
+    ld x0, X+
+    ld x1, X+
+    ld x2, X+
+    ld x3, X+
+    sbiw XL, 4
+
+    ror  t0
+    brcc next1
+    eor  x0, t3
+next1:
+    ror  t0
+    brcc next2
+    eor  x1, t3
+next2:
+    ror  t0
+    brcc next3
+    eor  x2, t3
+next3:
+    ror  t0
+    brcc next4
+    eor  x3, t3
+next4:
+    ; Sbox_TwoRows
+    eor  x1, x2
+    mov  t1, x2
+    and  t1, x1
+    eor  x3, t1
+    mov  t1, x3
+    and  x3, x1
+    eor  x3, x2
+    mov  t2, x3
+    eor  x3, x0
+    com  x3
+    mov  x2, x3
+    or   t2, x0
+    eor  x0, t1
+    eor  x1, x0
+    or   x2, x1
+    eor  x2, t1
+    eor  x1, t2
+    eor  x3, x1
+
+    movw cnt0, ZL
+    movw ZL, YL
+    ijmp
+
+ShiftRow_RecoverZ_NoLPM:
+    movw ZL, cnt0
+    rjmp ShiftRow_STORE_ROW
+ShiftRow_RecoverZ_LPM:
+    movw ZL, cnt0
+    lpm t0, Z+ ; Load two nibbles of round constant for row 2i, 2i+1
+ShiftRow_STORE_ROW:
+    Store_OneRow
+    adiw YL, 1
+    dec rmp
+    brne AddRC_Sbox_ShiftRow_Loop
+
+;;;;;;;;;;;;;;;;;;;;;;;;  MixColumn Subroutnes
+
+    rjmp MC_Start
+
+mul_routine_table:
+    rjmp mul2_GF16_0x13_xor
+    rjmp mul4_GF16_0x13_xor
+    rjmp mul2_GF16_0x13_xor
+    rjmp mulb_GF16_0x13_xor
+    rjmp mul2_GF16_0x13_xor
+    rjmp mul8_GF16_0x13_xor
+    rjmp mul5_GF16_0x13_xor
+    rjmp mul6_GF16_0x13_xor
+
+; For all mul2_GF16_0x13_xor:
+; Input
+; MSB........LSB
+; x0=@0: x1=@1: x2=@2: x3=@3
+mul2_GF16_0x13_xor:
+    ; # define mul2_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
+    ;   x3 = XOR (x3 ,x0); \
+    ; } while (0) ; /* Output : ( MSB ) x1 ,x2 ,x3 , x0 ( LSB ) */
+    eor t3, t0
+    eor x0, t0
+    eor x1, t3
+    eor x2, t2
+    eor x3, t1
+    rjmp MC_INC_CNT1
+
+mul4_GF16_0x13_xor:
+    ; # define mul4_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
+    ;   x3 = XOR (x3 ,x0); x0 = XOR (x0 ,x1); \
+    ; } while (0) ; /* Output : ( MSB ) x2 ,x3 ,x0 , x1 ( LSB ) */
+    eor t3, t0
+    eor t0, t1
+    eor x0, t1
+    eor x1, t0
+    eor x2, t3
+    eor x3, t2
+    rjmp MC_INC_CNT1
+
+mul5_GF16_0x13_xor:
+    ; # define mul5_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
+    ;   x2 = XOR (x2 ,x0); x3 = XOR (x3 ,x1); \
+    ;   x1 = XOR (x1 ,x2); x0 = XOR (x0 ,x3); \
+    ; } while (0) ; /* Output : ( MSB ) x2 ,x0 ,x1 , x3 ( LSB ) */
+    eor t2, t0
+    eor t3, t1
+    eor t1, t2
+    eor t0, t3
+    eor x0, t3
+    eor x1, t1
+    eor x2, t0
+    eor x3, t2
+    rjmp MC_INC_CNT1
+
+mul6_GF16_0x13_xor:
+    ; # define mul6_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
+    ;   x3 = XOR (x3 ,x1); x1 = XOR (x1 ,x0); \
+    ;   x2 = XOR (x2 ,x1); x0 = XOR (x0 ,x2); \
+    ;   x2 = XOR (x2 ,x3); \
+    ; } while (0) ; /* Output : ( MSB ) x0 ,x2 ,x3 , x1 ( LSB ) */
+    eor t3, t1
+    eor t1, t0
+    eor t2, t1
+    eor t0, t2
+    eor t2, t3
+    eor x0, t1
+    eor x1, t3
+    eor x2, t2
+    eor x3, t0
+    rjmp MC_STORE_ROW
+
+mul8_GF16_0x13_xor:
+    ; # define mul8_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
+    ;   x3 = XOR (x3 ,x0); x0 = XOR (x0 ,x1); \
+    ;   x1 = XOR (x1 ,x2); \
+    ; } while (0) ; /* Output : ( MSB ) x3 ,x0 ,x1 , x2 ( LSB ) */
+    eor t3, t0
+    eor t0, t1
+    eor t1, t2
+    eor x0, t2
+    eor x1, t1
+    eor x2, t0
+    eor x3, t3
+    rjmp MC_INC_CNT1
+
+mulb_GF16_0x13_xor:
+    ; # define mul11_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
+    ;   x2 = XOR (x2 ,x0); x1 = XOR (x1 ,x3); \
+    ;   x0 = XOR (x0 ,x1); x3 = XOR (x3 ,x2); \
+    ; } while (0) ; /* Output : ( MSB ) x1 ,x2 ,x0 , x3 ( LSB ) */
+    eor t2, t0
+    eor t1, t3
+    eor t0, t1
+    eor t3, t2
+    eor x0, t3
+    eor x1, t0
+    eor x2, t2
+    eor x3, t1
+    rjmp MC_INC_CNT1
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Start MixColumns
+MC_Start:
+    movw addr4, ZL
+    ldi XH, hi8(SRAM_STATE)
+    ldi XL, lo8(SRAM_STATE)
+    movw YL, XL
+    clr cnt0
+    clr cnt1
+A1:
+    mov cnt1, cnt0
+    clr x0
+    clr x1
+    clr x2
+    clr x3
+    ldi ZH, pm_hi8(mul_routine_table)
+    ldi ZL, pm_lo8(mul_routine_table)
+MC_MUL_LOOP:
+    ld  t3, X+
+    ld  t2, X+
+    ld  t1, X+
+    ld  t0, X+
+    ijmp
+MC_INC_CNT1:
+    inc  cnt1
+    cpi  cnt1, DIM
+    brne MC_MUL_NEXT
+    clr  cnt1
+    movw XL, YL
+MC_MUL_NEXT:
+    adiw ZL, 1
+    rjmp MC_MUL_LOOP
+MC_STORE_ROW:
+    cpi  cnt0, 0
+    brne MC_STORE_DIRECT
+    sbiw XL, STATE_INBYTES
+MC_STORE_DIRECT:
+    Store_OneRow
+
+    inc cnt0
+    cpi cnt0, DIM
+    brne A1
+    movw ZL, addr4
+ret
+
+PHOTON_Permutation:
+    ldi ZH, hi8(RC)
+    ldi ZL, lo8(RC)
+    ldi cnt2, ROUND_N
+round_loop_start:
+    rcall RoundFunction
+    dec cnt2
+    brne round_loop_start
+ret
+
+.section .text
+RC:
+.byte 0x01,0x62,0xFE,0x9D
+.byte 0x23,0x40,0xDC,0xBF
+.byte 0x67,0x04,0x98,0xFB
+.byte 0xFE,0x9D,0x01,0x62
+.byte 0xCD,0xAE,0x32,0x51
+.byte 0xAB,0xC8,0x54,0x37
+.byte 0x76,0x15,0x89,0xEA
+.byte 0xDC,0xBF,0x23,0x40
+.byte 0x89,0xEA,0x76,0x15
+.byte 0x32,0x51,0xCD,0xAE
+.byte 0x45,0x26,0xBA,0xD9
+.byte 0xBA,0xD9,0x45,0x26
--- a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_lowrom/api.h
+++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_lowrom/api.h
+#define CRYPTO_KEYBYTES     16
+#define CRYPTO_NSECBYTES    0
+#define CRYPTO_NPUBBYTES    16
+#define CRYPTO_ABYTES       16
+#define CRYPTO_NOOVERLAP    1
--- a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_lowrom/assist.h
+++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_lowrom/assist.h
+;
+; **********************************************
+; * PHOTON-Beetle                              *
+; * Authenticated Encryption and Hash Family   *
+; *                                            *
+; * Assembly implementation for 8-bit AVR CPU  *
+; * Version 1.0 2020 by PHOTON-Beetle Team     *
+; **********************************************
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Bitslice
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+.MACRO Reorder_8_bits i0, i1, i2, i3, i4
+    ror \i0
+    ror \i1
+    ror \i0
+    ror \i2
+    ror \i0
+    ror \i3
+    ror \i0
+    ror \i4 
+    ror \i0
+    ror \i1
+    ror \i0
+    ror \i2
+    ror \i0
+    ror \i3
+    ror \i0
+    ror \i4 
+.ENDM
+
+.MACRO InvReorder_8_bits i0, i1, i2, i3, i4
+    ror \i1
+    ror \i0
+    ror \i2
+    ror \i0
+    ror \i3
+    ror \i0
+    ror \i4 
+    ror \i0
+    ror \i1
+    ror \i0
+    ror \i2
+    ror \i0
+    ror \i3
+    ror \i0
+    ror \i4 
+    ror \i0
+.ENDM
+
+; require XH:XL be the address of the input
+Load_Reorder_32_bits:
+    ldi cnt1, 4
+reorder_8_bits_loop:
+    ld rmp, X+
+    Reorder_8_bits rmp, x0, x1, x2, x3
+    dec cnt1
+    brne reorder_8_bits_loop
+ret
+
+; require YH:YL be the address of the output
+invReorder_Store_32_bits:
+    ldi cnt1, 4
+invreorder_8_bits_loop:
+    InvReorder_8_bits rmp, x0, x1, x2, x3
+    st Y+, rmp
+    dec cnt1
+    brne invreorder_8_bits_loop
+ret
+
+; require XH:XL be the address of the input
+; require YH:YL be the address of the output
+Load_Reorder_Store_128_bits:
+    ldi cnt0, 4
+reorder_32_bits_loop:
+    rcall Load_Reorder_32_bits
+    st Y+, x0
+    st Y+, x1
+    st Y+, x2
+    st Y+, x3
+    dec cnt0
+    brne reorder_32_bits_loop
+ret
+
+; require XH:XL be the address of the input
+; require YH:YL be the address of the output
+Load_invReorder_Store_128_bits:
+    ldi cnt0, 4
+invreorder_32_bits_loop:
+    ld x0, X+
+    ld x1, X+
+    ld x2, X+
+    ld x3, X+
+    rcall invReorder_Store_32_bits
+    dec cnt0
+    brne invreorder_32_bits_loop
+ret
+
+.macro PUSH_ALL
+push    r2
+push    r3
+push    r4
+push    r5
+push    r6
+push    r7
+push    r8
+push    r9
+push    r10
+push    r11
+push    r12
+push    r13
+push    r14
+push    r15
+push    r16
+push    r17
+push    r28
+push    r29
+.endm
+
+.macro POP_ALL
+pop    r29
+pop    r28
+pop    r17
+pop    r16
+pop    r15
+pop    r14
+pop    r13
+pop    r12
+pop    r11
+pop    r10
+pop    r9
+pop    r8
+pop    r7
+pop    r6
+pop    r5
+pop    r4
+pop    r3
+pop    r2
+clr    r1
+.endm
--- a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_lowrom/crypto_aead.h
+++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_lowrom/crypto_aead.h
+#ifdef __cplusplus
+extern "C" {
+#endif
+    
+int crypto_aead_encrypt(
+	unsigned char *c,unsigned long long *clen,
+	const unsigned char *m,unsigned long long mlen,
+	const unsigned char *ad,unsigned long long adlen,
+	const unsigned char *nsec,
+	const unsigned char *npub,
+	const unsigned char *k
+	); 
+
+
+int crypto_aead_decrypt(
+	unsigned char *m,unsigned long long *outputmlen,
+	unsigned char *nsec,
+	const unsigned char *c,unsigned long long clen,
+	const unsigned char *ad,unsigned long long adlen,
+	const unsigned char *npub,
+	const unsigned char *k
+	); 
+
+#ifdef __cplusplus
+}
+#endif
--- a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_lowrom/encrypt.c
+++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_lowrom/encrypt.c
+#include <avr/io.h>
+#include <avr/sfr_defs.h>
+#include <stdlib.h>
+#include <string.h>
+#include "api.h"
+
+extern void crypto_aead_encrypt_asm(
+      unsigned char *c,         
+      const unsigned char *m,   
+      unsigned char mlen,  
+      const unsigned char *ad,  
+      unsigned char adlen, 
+      const unsigned char *npub,
+      const unsigned char *k    
+      );
+
+extern char crypto_aead_decrypt_asm(
+     unsigned char *m,          
+     const unsigned char *c,    
+     unsigned char clen,   
+     const unsigned char *ad,   
+     unsigned char adlen,  
+     const unsigned char *npub, 
+     const unsigned char *k     
+     );
+
+extern void crypto_hash_asm(
+    unsigned char *out,
+    const unsigned char *in,
+    unsigned char inlen
+    );
+
+
+int crypto_aead_encrypt(
+    unsigned char *c, unsigned long long *clen,
+    const unsigned char *m, unsigned long long mlen,
+    const unsigned char *ad, unsigned long long adlen,
+    const unsigned char *nsec,
+    const unsigned char *npub,
+    const unsigned char *k
+    )
+{
+    /*
+    ... 
+    ... the code for the cipher implementation goes here,
+    ... generating a ciphertext c[0],c[1],...,c[*clen-1]
+    ... from a plaintext m[0],m[1],...,m[mlen-1]
+    ... and associated data ad[0],ad[1],...,ad[adlen-1]
+    ... and nonce npub[0],npub[1],..
+    ... and secret key k[0],k[1],...
+    ... the implementation shall not use nsec
+    ...
+    ... return 0;
+    */
+
+    (void)nsec;
+
+    crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k);
+
+    *clen = mlen + CRYPTO_ABYTES;
+    return 0;
+}
+
+
+
+int crypto_aead_decrypt(
+    unsigned char *m, unsigned long long *mlen,
+    unsigned char *nsec,
+    const unsigned char *c, unsigned long long clen,
+    const unsigned char *ad, unsigned long long adlen,
+    const unsigned char *npub,
+    const unsigned char *k
+    )
+{
+    /*
+    ...
+    ... the code for the AEAD implementation goes here,
+    ... generating a plaintext m[0],m[1],...,m[*mlen-1]
+    ... and secret message number nsec[0],nsec[1],...
+    ... from a ciphertext c[0],c[1],...,c[clen-1]
+    ... and associated data ad[0],ad[1],...,ad[adlen-1]
+    ... and nonce number npub[0],npub[1],...
+    ... and secret key k[0],k[1],...
+    ...
+    ... return 0;
+    */
+    unsigned long long    mlen_;
+    char tag_is_match;
+
+    (void)nsec;
+    if (clen < CRYPTO_ABYTES) {
+        return -1;
+    }
+    mlen_ = clen - CRYPTO_ABYTES;
+
+    tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k);
+
+    if (tag_is_match != 0)
+    {
+        memset(m, 0, (size_t)mlen_);
+        return -1;
+    }
+    *mlen = mlen_;
+    return 0;
+}
\ No newline at end of file
--- a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_lowrom/encrypt_core.S
+++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_lowrom/encrypt_core.S
--- a/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_lowrom/photon.h
+++ b/photon-beetle/Implementations/crypto_aead/photonbeetleaead128rate32v1/avr8_lowrom/photon.h
+;
+; **********************************************
+; * PHOTON-Beetle                              *
+; * Authenticated Encryption and Hash Family   *
+; *                                            *
+; * Assembly implementation for 8-bit AVR CPU  *
+; * Version 1.0 2020 by PHOTON-Beetle Team     *
+; **********************************************
+;
+#define ROUND_N  12
+#define DIM      8
+
+.MACRO Store_OneRow
+    st X+, x0
+    st X+, x1
+    st X+, x2
+    st X+, x3
+.ENDM
+
+.MACRO ROTL_1 i0
+    bst  \i0, 7
+    lsl  \i0
+    bld  \i0, 0
+.ENDM
+
+.MACRO ROTR_1 i0
+    bst  \i0, 0
+    lsr  \i0
+    bld  \i0, 7
+.ENDM
+
+.MACRO ROTR_4 i0
+    swap \i0
+.ENDM
+
+ROTR_1_ROW:
+    ROTR_1 x0
+    ROTR_1 x1
+    ROTR_1 x2
+    ROTR_1 x3
+ret
+
+ROTL_1_ROW:
+    ROTL_1 x0
+    ROTL_1 x1
+    ROTL_1 x2
+    ROTL_1 x3
+ret
+
+ROTR_4_ROW:
+    ROTR_4 x0
+    ROTR_4 x1
+    ROTR_4 x2
+    ROTR_4 x3
+ret
+
+RoundFunction:
+
+    rjmp AddRC_Sbox_ShiftRow_Start
+
+ShiftRow_routine_table:
+    rjmp ShiftRow_RecoverZ_NoLPM
+    rjmp ShiftRow_1
+    rjmp ShiftRow_2
+    rjmp ShiftRow_3
+    rjmp ShiftRow_4
+    rjmp ShiftRow_5
+    rjmp ShiftRow_6
+    rjmp ShiftRow_7
+
+ShiftRow_1:
+    rcall ROTR_1_ROW
+    rjmp ShiftRow_RecoverZ_LPM
+
+ShiftRow_2:
+    rcall ROTR_1_ROW
+    rcall ROTR_1_ROW
+    rjmp ShiftRow_RecoverZ_NoLPM
+
+ShiftRow_3:
+    rcall ROTR_4_ROW
+    rcall ROTL_1_ROW
+    rjmp ShiftRow_RecoverZ_LPM
+
+ShiftRow_4:
+    rcall ROTR_4_ROW
+    rjmp ShiftRow_RecoverZ_NoLPM
+
+ShiftRow_5:
+    rcall ROTR_4_ROW
+    rcall ROTR_1_ROW
+    rjmp ShiftRow_RecoverZ_LPM
+
+ShiftRow_6:
+    rcall ROTL_1_ROW
+    rcall ROTL_1_ROW
+    rjmp ShiftRow_RecoverZ_NoLPM
+
+ShiftRow_7:
+    rcall ROTL_1_ROW
+    rjmp ShiftRow_RecoverZ_NoLPM
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Start AddRC_Sbox_ShiftRow
+AddRC_Sbox_ShiftRow_Start:
+    clr t3
+    inc t3
+
+    ldi XL, lo8(SRAM_STATE)
+    ldi XH, hi8(SRAM_STATE)
+
+    ldi YL, pm_lo8(ShiftRow_routine_table)
+    ldi YH, pm_hi8(ShiftRow_routine_table)
+    ldi rmp, DIM
+
+    lpm t0, Z+ ; Load two nibbles of round constant for row 0, 1
+AddRC_Sbox_ShiftRow_Loop:
+    ; AddRC_TwoRows
+    ld x0, X+
+    ld x1, X+
+    ld x2, X+
+    ld x3, X+
+    sbiw XL, 4
+
+    ror  t0
+    brcc next1
+    eor  x0, t3
+next1:
+    ror  t0
+    brcc next2
+    eor  x1, t3
+next2:
+    ror  t0
+    brcc next3
+    eor  x2, t3
+next3:
+    ror  t0
+    brcc next4
+    eor  x3, t3
+next4:
+    ; Sbox_TwoRows
+    eor  x1, x2
+    mov  t1, x2
+    and  t1, x1
+    eor  x3, t1
+    mov  t1, x3
+    and  x3, x1
+    eor  x3, x2
+    mov  t2, x3
+    eor  x3, x0
+    com  x3
+    mov  x2, x3
+    or   t2, x0
+    eor  x0, t1
+    eor  x1, x0
+    or   x2, x1
+    eor  x2, t1
+    eor  x1, t2
+    eor  x3, x1
+
+    movw cnt0, ZL
+    movw ZL, YL
+    ijmp
+
+ShiftRow_RecoverZ_NoLPM:
+    movw ZL, cnt0
+    rjmp ShiftRow_STORE_ROW
+ShiftRow_RecoverZ_LPM:
+    movw ZL, cnt0
+    lpm t0, Z+ ; Load two nibbles of round constant for row 2i, 2i+1
+ShiftRow_STORE_ROW:
+    Store_OneRow
+    adiw YL, 1
+    dec rmp
+    brne AddRC_Sbox_ShiftRow_Loop
+
+;;;;;;;;;;;;;;;;;;;;;;;;  MixColumn Subroutnes
+
+    rjmp MC_Start
+
+mul_routine_table:
+    rjmp mul2_GF16_0x13_xor
+    rjmp mul4_GF16_0x13_xor
+    rjmp mul2_GF16_0x13_xor
+    rjmp mulb_GF16_0x13_xor
+    rjmp mul2_GF16_0x13_xor
+    rjmp mul8_GF16_0x13_xor
+    rjmp mul5_GF16_0x13_xor
+    rjmp mul6_GF16_0x13_xor
+
+; For all mul2_GF16_0x13_xor:
+; Input
+; MSB........LSB
+; x0=@0: x1=@1: x2=@2: x3=@3
+mul2_GF16_0x13_xor:
+    ; # define mul2_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
+    ;   x3 = XOR (x3 ,x0); \
+    ; } while (0) ; /* Output : ( MSB ) x1 ,x2 ,x3 , x0 ( LSB ) */
+    eor t3, t0
+    eor x0, t0
+    eor x1, t3
+    eor x2, t2
+    eor x3, t1
+    rjmp MC_INC_CNT1
+
+mul4_GF16_0x13_xor:
+    ; # define mul4_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
+    ;   x3 = XOR (x3 ,x0); x0 = XOR (x0 ,x1); \
+    ; } while (0) ; /* Output : ( MSB ) x2 ,x3 ,x0 , x1 ( LSB ) */
+    eor t3, t0
+    eor t0, t1
+    eor x0, t1
+    eor x1, t0
+    eor x2, t3
+    eor x3, t2
+    rjmp MC_INC_CNT1
+
+mul5_GF16_0x13_xor:
+    ; # define mul5_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
+    ;   x2 = XOR (x2 ,x0); x3 = XOR (x3 ,x1); \
+    ;   x1 = XOR (x1 ,x2); x0 = XOR (x0 ,x3); \
+    ; } while (0) ; /* Output : ( MSB ) x2 ,x0 ,x1 , x3 ( LSB ) */
+    eor t2, t0
+    eor t3, t1
+    eor t1, t2
+    eor t0, t3
+    eor x0, t3
+    eor x1, t1
+    eor x2, t0
+    eor x3, t2
+    rjmp MC_INC_CNT1
+
+mul6_GF16_0x13_xor:
+    ; # define mul6_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
+    ;   x3 = XOR (x3 ,x1); x1 = XOR (x1 ,x0); \
+    ;   x2 = XOR (x2 ,x1); x0 = XOR (x0 ,x2); \
+    ;   x2 = XOR (x2 ,x3); \
+    ; } while (0) ; /* Output : ( MSB ) x0 ,x2 ,x3 , x1 ( LSB ) */
+    eor t3, t1
+    eor t1, t0
+    eor t2, t1
+    eor t0, t2
+    eor t2, t3
+    eor x0, t1
+    eor x1, t3
+    eor x2, t2
+    eor x3, t0
+    rjmp MC_STORE_ROW
+
+mul8_GF16_0x13_xor:
+    ; # define mul8_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
+    ;   x3 = XOR (x3 ,x0); x0 = XOR (x0 ,x1); \
+    ;   x1 = XOR (x1 ,x2); \
+    ; } while (0) ; /* Output : ( MSB ) x3 ,x0 ,x1 , x2 ( LSB ) */
+    eor t3, t0
+    eor t0, t1
+    eor t1, t2
+    eor x0, t2
+    eor x1, t1
+    eor x2, t0
+    eor x3, t3
+    rjmp MC_INC_CNT1
+
+mulb_GF16_0x13_xor:
+    ; # define mul11_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
+    ;   x2 = XOR (x2 ,x0); x1 = XOR (x1 ,x3); \
+    ;   x0 = XOR (x0 ,x1); x3 = XOR (x3 ,x2); \
+    ; } while (0) ; /* Output : ( MSB ) x1 ,x2 ,x0 , x3 ( LSB ) */
+    eor t2, t0
+    eor t1, t3
+    eor t0, t1
+    eor t3, t2
+    eor x0, t3
+    eor x1, t0
+    eor x2, t2
+    eor x3, t1
+    rjmp MC_INC_CNT1
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Start MixColumns
+MC_Start:
+    movw addr4, ZL
+    ldi XH, hi8(SRAM_STATE)
+    ldi XL, lo8(SRAM_STATE)
+    movw YL, XL
+    clr cnt0
+    clr cnt1
+A1:
+    mov cnt1, cnt0
+    clr x0
+    clr x1
+    clr x2
+    clr x3
+    ldi ZH, pm_hi8(mul_routine_table)
+    ldi ZL, pm_lo8(mul_routine_table)
+MC_MUL_LOOP:
+    ld  t3, X+
+    ld  t2, X+
+    ld  t1, X+
+    ld  t0, X+
+    ijmp
+MC_INC_CNT1:
+    inc  cnt1
+    cpi  cnt1, DIM
+    brne MC_MUL_NEXT
+    clr  cnt1
+    movw XL, YL
+MC_MUL_NEXT:
+    adiw ZL, 1
+    rjmp MC_MUL_LOOP
+MC_STORE_ROW:
+    cpi  cnt0, 0
+    brne MC_STORE_DIRECT
+    sbiw XL, STATE_INBYTES
+MC_STORE_DIRECT:
+    Store_OneRow
+
+    inc cnt0
+    cpi cnt0, DIM
+    brne A1
+    movw ZL, addr4
+ret
+
+PHOTON_Permutation:
+    ldi ZH, hi8(RC)
+    ldi ZL, lo8(RC)
+    ldi cnt2, ROUND_N
+round_loop_start:
+    rcall RoundFunction
+    dec cnt2
+    brne round_loop_start
+ret
+
+.section .text
+RC:
+.byte 0x01,0x62,0xFE,0x9D
+.byte 0x23,0x40,0xDC,0xBF
+.byte 0x67,0x04,0x98,0xFB
+.byte 0xFE,0x9D,0x01,0x62
+.byte 0xCD,0xAE,0x32,0x51
+.byte 0xAB,0xC8,0x54,0x37
+.byte 0x76,0x15,0x89,0xEA
+.byte 0xDC,0xBF,0x23,0x40
+.byte 0x89,0xEA,0x76,0x15
+.byte 0x32,0x51,0xCD,0xAE
+.byte 0x45,0x26,0xBA,0xD9
+.byte 0xBA,0xD9,0x45,0x26
--- a/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_lowrom/api.h
+++ b/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_lowrom/api.h
+#define CRYPTO_BYTES 32
\ No newline at end of file
--- a/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_lowrom/assist.h
+++ b/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_lowrom/assist.h
+;
+; **********************************************
+; * PHOTON-Beetle                              *
+; * Authenticated Encryption and Hash Family   *
+; *                                            *
+; * Assembly implementation for 8-bit AVR CPU  *
+; * Version 1.0 2020 by PHOTON-Beetle Team     *
+; **********************************************
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Bitslice
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+.MACRO Reorder_8_bits i0, i1, i2, i3, i4
+    ror \i0
+    ror \i1
+    ror \i0
+    ror \i2
+    ror \i0
+    ror \i3
+    ror \i0
+    ror \i4 
+    ror \i0
+    ror \i1
+    ror \i0
+    ror \i2
+    ror \i0
+    ror \i3
+    ror \i0
+    ror \i4 
+.ENDM
+
+.MACRO InvReorder_8_bits i0, i1, i2, i3, i4
+    ror \i1
+    ror \i0
+    ror \i2
+    ror \i0
+    ror \i3
+    ror \i0
+    ror \i4 
+    ror \i0
+    ror \i1
+    ror \i0
+    ror \i2
+    ror \i0
+    ror \i3
+    ror \i0
+    ror \i4 
+    ror \i0
+.ENDM
+
+; require XH:XL be the address of the input
+Load_Reorder_32_bits:
+    ldi cnt1, 4
+reorder_8_bits_loop:
+    ld rmp, X+
+    Reorder_8_bits rmp, x0, x1, x2, x3
+    dec cnt1
+    brne reorder_8_bits_loop
+ret
+
+; require YH:YL be the address of the output
+invReorder_Store_32_bits:
+    ldi cnt1, 4
+invreorder_8_bits_loop:
+    InvReorder_8_bits rmp, x0, x1, x2, x3
+    st Y+, rmp
+    dec cnt1
+    brne invreorder_8_bits_loop
+ret
+
+; require XH:XL be the address of the input
+; require YH:YL be the address of the output
+Load_Reorder_Store_128_bits:
+    ldi cnt0, 4
+reorder_32_bits_loop:
+    rcall Load_Reorder_32_bits
+    st Y+, x0
+    st Y+, x1
+    st Y+, x2
+    st Y+, x3
+    dec cnt0
+    brne reorder_32_bits_loop
+ret
+
+; require XH:XL be the address of the input
+; require YH:YL be the address of the output
+Load_invReorder_Store_128_bits:
+    ldi cnt0, 4
+invreorder_32_bits_loop:
+    ld x0, X+
+    ld x1, X+
+    ld x2, X+
+    ld x3, X+
+    rcall invReorder_Store_32_bits
+    dec cnt0
+    brne invreorder_32_bits_loop
+ret
+
+.macro PUSH_ALL
+push    r2
+push    r3
+push    r4
+push    r5
+push    r6
+push    r7
+push    r8
+push    r9
+push    r10
+push    r11
+push    r12
+push    r13
+push    r14
+push    r15
+push    r16
+push    r17
+push    r28
+push    r29
+.endm
+
+.macro POP_ALL
+pop    r29
+pop    r28
+pop    r17
+pop    r16
+pop    r15
+pop    r14
+pop    r13
+pop    r12
+pop    r11
+pop    r10
+pop    r9
+pop    r8
+pop    r7
+pop    r6
+pop    r5
+pop    r4
+pop    r3
+pop    r2
+clr    r1
+.endm
--- a/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_lowrom/crypto_hash.h
+++ b/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_lowrom/crypto_hash.h
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int crypto_hash(
+	unsigned char *out,
+	const unsigned char *in,
+	unsigned long long inlen
+	);
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
--- a/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_lowrom/encrypt_core.S
+++ b/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_lowrom/encrypt_core.S
--- a/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_lowrom/hash.c
+++ b/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_lowrom/hash.c
+#include <avr/io.h>
+#include <avr/sfr_defs.h>
+#include <stdlib.h>
+#include <string.h>
+#include "api.h"
+#include "crypto_hash.h"
+
+extern void crypto_hash_asm(
+    unsigned char *out,
+    const unsigned char *in,
+    unsigned char inlen
+    );
+
+int crypto_hash(
+	unsigned char *out,
+	const unsigned char *in,
+	unsigned long long inlen
+)
+{
+	/*
+	...
+	... the code for the hash function implementation goes here
+	... generating a hash value out[0],out[1],...,out[CRYPTO_BYTES-1]
+	... from a message in[0],in[1],...,in[in-1] 
+	...
+	... return 0;
+	*/
+
+    crypto_hash_asm(out, in, inlen);
+
+	return 0;
+}
\ No newline at end of file
--- a/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_lowrom/photon.h
+++ b/photon-beetle/Implementations/crypto_hash/photonbeetlehash256rate32v1/avr8_lowrom/photon.h
+;
+; **********************************************
+; * PHOTON-Beetle                              *
+; * Authenticated Encryption and Hash Family   *
+; *                                            *
+; * Assembly implementation for 8-bit AVR CPU  *
+; * Version 1.0 2020 by PHOTON-Beetle Team     *
+; **********************************************
+;
+#define ROUND_N  12
+#define DIM      8
+
+.MACRO Store_OneRow
+    st X+, x0
+    st X+, x1
+    st X+, x2
+    st X+, x3
+.ENDM
+
+.MACRO ROTL_1 i0
+    bst  \i0, 7
+    lsl  \i0
+    bld  \i0, 0
+.ENDM
+
+.MACRO ROTR_1 i0
+    bst  \i0, 0
+    lsr  \i0
+    bld  \i0, 7
+.ENDM
+
+.MACRO ROTR_4 i0
+    swap \i0
+.ENDM
+
+ROTR_1_ROW:
+    ROTR_1 x0
+    ROTR_1 x1
+    ROTR_1 x2
+    ROTR_1 x3
+ret
+
+ROTL_1_ROW:
+    ROTL_1 x0
+    ROTL_1 x1
+    ROTL_1 x2
+    ROTL_1 x3
+ret
+
+ROTR_4_ROW:
+    ROTR_4 x0
+    ROTR_4 x1
+    ROTR_4 x2
+    ROTR_4 x3
+ret
+
+RoundFunction:
+
+    rjmp AddRC_Sbox_ShiftRow_Start
+
+ShiftRow_routine_table:
+    rjmp ShiftRow_RecoverZ_NoLPM
+    rjmp ShiftRow_1
+    rjmp ShiftRow_2
+    rjmp ShiftRow_3
+    rjmp ShiftRow_4
+    rjmp ShiftRow_5
+    rjmp ShiftRow_6
+    rjmp ShiftRow_7
+
+ShiftRow_1:
+    rcall ROTR_1_ROW
+    rjmp ShiftRow_RecoverZ_LPM
+
+ShiftRow_2:
+    rcall ROTR_1_ROW
+    rcall ROTR_1_ROW
+    rjmp ShiftRow_RecoverZ_NoLPM
+
+ShiftRow_3:
+    rcall ROTR_4_ROW
+    rcall ROTL_1_ROW
+    rjmp ShiftRow_RecoverZ_LPM
+
+ShiftRow_4:
+    rcall ROTR_4_ROW
+    rjmp ShiftRow_RecoverZ_NoLPM
+
+ShiftRow_5:
+    rcall ROTR_4_ROW
+    rcall ROTR_1_ROW
+    rjmp ShiftRow_RecoverZ_LPM
+
+ShiftRow_6:
+    rcall ROTL_1_ROW
+    rcall ROTL_1_ROW
+    rjmp ShiftRow_RecoverZ_NoLPM
+
+ShiftRow_7:
+    rcall ROTL_1_ROW
+    rjmp ShiftRow_RecoverZ_NoLPM
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Start AddRC_Sbox_ShiftRow
+AddRC_Sbox_ShiftRow_Start:
+    clr t3
+    inc t3
+
+    ldi XL, lo8(SRAM_STATE)
+    ldi XH, hi8(SRAM_STATE)
+
+    ldi YL, pm_lo8(ShiftRow_routine_table)
+    ldi YH, pm_hi8(ShiftRow_routine_table)
+    ldi rmp, DIM
+
+    lpm t0, Z+ ; Load two nibbles of round constant for row 0, 1
+AddRC_Sbox_ShiftRow_Loop:
+    ; AddRC_TwoRows
+    ld x0, X+
+    ld x1, X+
+    ld x2, X+
+    ld x3, X+
+    sbiw XL, 4
+
+    ror  t0
+    brcc next1
+    eor  x0, t3
+next1:
+    ror  t0
+    brcc next2
+    eor  x1, t3
+next2:
+    ror  t0
+    brcc next3
+    eor  x2, t3
+next3:
+    ror  t0
+    brcc next4
+    eor  x3, t3
+next4:
+    ; Sbox_TwoRows
+    eor  x1, x2
+    mov  t1, x2
+    and  t1, x1
+    eor  x3, t1
+    mov  t1, x3
+    and  x3, x1
+    eor  x3, x2
+    mov  t2, x3
+    eor  x3, x0
+    com  x3
+    mov  x2, x3
+    or   t2, x0
+    eor  x0, t1
+    eor  x1, x0
+    or   x2, x1
+    eor  x2, t1
+    eor  x1, t2
+    eor  x3, x1
+
+    movw cnt0, ZL
+    movw ZL, YL
+    ijmp
+
+ShiftRow_RecoverZ_NoLPM:
+    movw ZL, cnt0
+    rjmp ShiftRow_STORE_ROW
+ShiftRow_RecoverZ_LPM:
+    movw ZL, cnt0
+    lpm t0, Z+ ; Load two nibbles of round constant for row 2i, 2i+1
+ShiftRow_STORE_ROW:
+    Store_OneRow
+    adiw YL, 1
+    dec rmp
+    brne AddRC_Sbox_ShiftRow_Loop
+
+;;;;;;;;;;;;;;;;;;;;;;;;  MixColumn Subroutnes
+
+    rjmp MC_Start
+
+mul_routine_table:
+    rjmp mul2_GF16_0x13_xor
+    rjmp mul4_GF16_0x13_xor
+    rjmp mul2_GF16_0x13_xor
+    rjmp mulb_GF16_0x13_xor
+    rjmp mul2_GF16_0x13_xor
+    rjmp mul8_GF16_0x13_xor
+    rjmp mul5_GF16_0x13_xor
+    rjmp mul6_GF16_0x13_xor
+
+; For all mul2_GF16_0x13_xor:
+; Input
+; MSB........LSB
+; x0=@0: x1=@1: x2=@2: x3=@3
+mul2_GF16_0x13_xor:
+    ; # define mul2_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
+    ;   x3 = XOR (x3 ,x0); \
+    ; } while (0) ; /* Output : ( MSB ) x1 ,x2 ,x3 , x0 ( LSB ) */
+    eor t3, t0
+    eor x0, t0
+    eor x1, t3
+    eor x2, t2
+    eor x3, t1
+    rjmp MC_INC_CNT1
+
+mul4_GF16_0x13_xor:
+    ; # define mul4_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
+    ;   x3 = XOR (x3 ,x0); x0 = XOR (x0 ,x1); \
+    ; } while (0) ; /* Output : ( MSB ) x2 ,x3 ,x0 , x1 ( LSB ) */
+    eor t3, t0
+    eor t0, t1
+    eor x0, t1
+    eor x1, t0
+    eor x2, t3
+    eor x3, t2
+    rjmp MC_INC_CNT1
+
+mul5_GF16_0x13_xor:
+    ; # define mul5_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
+    ;   x2 = XOR (x2 ,x0); x3 = XOR (x3 ,x1); \
+    ;   x1 = XOR (x1 ,x2); x0 = XOR (x0 ,x3); \
+    ; } while (0) ; /* Output : ( MSB ) x2 ,x0 ,x1 , x3 ( LSB ) */
+    eor t2, t0
+    eor t3, t1
+    eor t1, t2
+    eor t0, t3
+    eor x0, t3
+    eor x1, t1
+    eor x2, t0
+    eor x3, t2
+    rjmp MC_INC_CNT1
+
+mul6_GF16_0x13_xor:
+    ; # define mul6_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
+    ;   x3 = XOR (x3 ,x1); x1 = XOR (x1 ,x0); \
+    ;   x2 = XOR (x2 ,x1); x0 = XOR (x0 ,x2); \
+    ;   x2 = XOR (x2 ,x3); \
+    ; } while (0) ; /* Output : ( MSB ) x0 ,x2 ,x3 , x1 ( LSB ) */
+    eor t3, t1
+    eor t1, t0
+    eor t2, t1
+    eor t0, t2
+    eor t2, t3
+    eor x0, t1
+    eor x1, t3
+    eor x2, t2
+    eor x3, t0
+    rjmp MC_STORE_ROW
+
+mul8_GF16_0x13_xor:
+    ; # define mul8_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
+    ;   x3 = XOR (x3 ,x0); x0 = XOR (x0 ,x1); \
+    ;   x1 = XOR (x1 ,x2); \
+    ; } while (0) ; /* Output : ( MSB ) x3 ,x0 ,x1 , x2 ( LSB ) */
+    eor t3, t0
+    eor t0, t1
+    eor t1, t2
+    eor x0, t2
+    eor x1, t1
+    eor x2, t0
+    eor x3, t3
+    rjmp MC_INC_CNT1
+
+mulb_GF16_0x13_xor:
+    ; # define mul11_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
+    ;   x2 = XOR (x2 ,x0); x1 = XOR (x1 ,x3); \
+    ;   x0 = XOR (x0 ,x1); x3 = XOR (x3 ,x2); \
+    ; } while (0) ; /* Output : ( MSB ) x1 ,x2 ,x0 , x3 ( LSB ) */
+    eor t2, t0
+    eor t1, t3
+    eor t0, t1
+    eor t3, t2
+    eor x0, t3
+    eor x1, t0
+    eor x2, t2
+    eor x3, t1
+    rjmp MC_INC_CNT1
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Start MixColumns
+MC_Start:
+    movw addr4, ZL
+    ldi XH, hi8(SRAM_STATE)
+    ldi XL, lo8(SRAM_STATE)
+    movw YL, XL
+    clr cnt0
+    clr cnt1
+A1:
+    mov cnt1, cnt0
+    clr x0
+    clr x1
+    clr x2
+    clr x3
+    ldi ZH, pm_hi8(mul_routine_table)
+    ldi ZL, pm_lo8(mul_routine_table)
+MC_MUL_LOOP:
+    ld  t3, X+
+    ld  t2, X+
+    ld  t1, X+
+    ld  t0, X+
+    ijmp
+MC_INC_CNT1:
+    inc  cnt1
+    cpi  cnt1, DIM
+    brne MC_MUL_NEXT
+    clr  cnt1
+    movw XL, YL
+MC_MUL_NEXT:
+    adiw ZL, 1
+    rjmp MC_MUL_LOOP
+MC_STORE_ROW:
+    cpi  cnt0, 0
+    brne MC_STORE_DIRECT
+    sbiw XL, STATE_INBYTES
+MC_STORE_DIRECT:
+    Store_OneRow
+
+    inc cnt0
+    cpi cnt0, DIM
+    brne A1
+    movw ZL, addr4
+ret
+
+PHOTON_Permutation:
+    ldi ZH, hi8(RC)
+    ldi ZL, lo8(RC)
+    ldi cnt2, ROUND_N
+round_loop_start:
+    rcall RoundFunction
+    dec cnt2
+    brne round_loop_start
+ret
+
+.section .text
+RC:
+.byte 0x01,0x62,0xFE,0x9D
+.byte 0x23,0x40,0xDC,0xBF
+.byte 0x67,0x04,0x98,0xFB
+.byte 0xFE,0x9D,0x01,0x62
+.byte 0xCD,0xAE,0x32,0x51
+.byte 0xAB,0xC8,0x54,0x37
+.byte 0x76,0x15,0x89,0xEA
+.byte 0xDC,0xBF,0x23,0x40
+.byte 0x89,0xEA,0x76,0x15
+.byte 0x32,0x51,0xCD,0xAE
+.byte 0x45,0x26,0xBA,0xD9
+.byte 0xBA,0xD9,0x45,0x26