; ; ********************************************** ; * KNOT: a family of bit-slice lightweight * ; * authenticated encryption algorithms * ; * and hash functions * ; * * ; * Assembly implementation for 8-bit AVR CPU * ; * Version 1.1 2020 by KNOT Team * ; ********************************************** ; #define x20 r0 #define x22 r2 #define x24 r4 #define x26 r6 #define x28 r1 #define x2a r3 #define x2c r5 #define x2e r7 #define x30 r8 #define x3d r10 #define x3a r12 #define x37 r14 #define x34 r16 #define x31 r18 #define x3e r20 #define x3b r22 #define x38 r9 #define x35 r11 #define x32 r13 #define x3f r15 #define x3c r17 #define x39 r19 #define x36 r21 #define x33 r23 #define t0j r24 #define t1j r25 #define x0j r25 #define x1j r27 #define x2j r26 #include "assist.h" .macro Sbox i0, i1, i2, i3 ldi t0j, 0xFF eor \i0, t0j mov t0j, \i1 and \i1, \i0 eor \i1, \i2 or \i2, t0j eor \i0, \i3 eor \i2, \i0 eor t0j, \i3 and \i0, \i1 eor \i3, \i1 eor \i0, t0j and t0j, \i2 eor \i1, t0j .endm .macro TwoColumns i2_e, i3_e, i3_o ; column 2i ld x0j, Y ldd x1j, Y + ROW_INBYTES Sbox x0j, x1j, \i2_e, \i3_e st Y+, x0j rol x1j ; ShiftRows -- Row 1 <<< 1 std Y + ROW_INBYTES - 1, x1j ; column 2i+1 ld x0j, Y ldd x1j, Y + ROW_INBYTES Sbox x0j, x1j, x2j, \i3_o st Y+, x0j rol x1j ; ShiftRows -- Row 1 <<< 1 std Y + ROW_INBYTES - 1, x1j ldd t0j, Y + 2 * ROW_INBYTES + 1 std Y + 2 * ROW_INBYTES + 1, x2j mov x2j, t0j .endm Permutation: PUSH_CONFLICT mov rcnt, rn push rcnt ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) ldd x20, Y + 0x00 ldd x22, Y + 0x02 ldd x24, Y + 0x04 ldd x26, Y + 0x06 ldd x28, Y + 0x08 ldd x2a, Y + 0x0a ldd x2c, Y + 0x0c ldd x2e, Y + 0x0e adiw YL, ROW_INBYTES ld x30, Y+ ld x31, Y+ ld x32, Y+ ld x33, Y+ ld x34, Y+ ld x35, Y+ ld x36, Y+ ld x37, Y+ ld x38, Y+ ld x39, Y+ ld x3a, Y+ ld x3b, Y+ ld x3c, Y+ ld x3d, Y+ ld x3e, Y+ ld x3f, Y+ #if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH rjmp For_Hash For_AEAD: ldi ZL, lo8(RC_LFSR7) ldi ZH, hi8(RC_LFSR7) rjmp round_loop_start For_Hash: ldi ZL, lo8(RC_LFSR8) ldi ZH, hi8(RC_LFSR8) #elif defined(CRYPTO_AEAD) ldi ZL, lo8(RC_LFSR7) ldi ZH, hi8(RC_LFSR7) #else ldi ZL, lo8(RC_LFSR8) ldi ZH, hi8(RC_LFSR8) #endif round_loop_start: ; AddRC lpm t0j, Z+ ldi YH, hi8(SRAM_STATE) ldi YL, lo8(SRAM_STATE) ; column 0 ld x0j, Y eor x0j, t0j ldd x1j, Y + ROW_INBYTES Sbox x0j, x1j, x20, x30 st Y+, x0j lsl x1j ; ShiftRows -- Row 1 <<< 1 std Y + ROW_INBYTES - 1, x1j ; column 1 ld x0j, Y ldd x1j, Y + ROW_INBYTES ldd x2j, Y + 2 * ROW_INBYTES Sbox x0j, x1j, x2j, x31 st Y+, x0j rol x1j ; ShiftRows -- Row 1 <<< 1 std Y + ROW_INBYTES - 1, x1j ldd t0j, Y + 2 * ROW_INBYTES + 1 std Y + 2 * ROW_INBYTES + 1, x2j mov x2j, t0j ; column 2, 3 TwoColumns x22, x32, x33 ; column 4, 5 TwoColumns x24, x34, x35 ; column 6, 7 TwoColumns x26, x36, x37 ; column 8, 9 TwoColumns x28, x38, x39 ; column 10, 11 TwoColumns x2a, x3a, x3b ; column 12, 13 TwoColumns x2c, x3c, x3d ; column 14 ld x0j, Y ldd x1j, Y + ROW_INBYTES Sbox x0j, x1j, x2e, x3e st Y+, x0j rol x1j ; ShiftRows -- Row 1 <<< 1 std Y + ROW_INBYTES - 1, x1j ; column 15 ld x0j, Y ldd x1j, Y + ROW_INBYTES Sbox x0j, x1j, x2j, x3f st Y+, x0j rol x1j ; ShiftRows -- Row 1 <<< 1 std Y + ROW_INBYTES - 1, x1j ld x1j, Y eor t0j, t0j adc x1j, t0j st Y, x1j std Y + ROW_INBYTES + 1, x2j ; f e d c b a 9 8 7 6 5 4 3 2 1 0 ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0 ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0 ; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2 ; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3 ; c b a 9 8 7 6 5 4 3 2 1 0 f e d ; x2e x2c x2a x28 x26 x24 x22 x20 => x2c x2a x28 x26 x24 x22 x20 x2e ;mov t0j, x2e ;mov x2e, x2c ;mov x2c, x2a ;mov x2a, x28 ;mov x28, x26 ;mov x26, x24 ;mov x24, x22 ;mov x22, x20 ;mov x20, t0j ; an intentionally arrangement of registers to facilitate movw movw t0j, x26 ; t1j:t0j <= x2e:x26 movw x26, x24 ; x2e:x26 <= x2c:x24 movw x24, x22 ; x2c:x24 <= x2a:x22 movw x22, x20 ; x2a:x22 <= x28:x20 mov x20, t1j ; x20 <= t1j mov x28, t0j ; x28 <= t0j ; <<< 1 mov t0j, x3f rol t0j rol x30 rol x31 rol x32 rol x33 rol x34 rol x35 rol x36 rol x37 rol x38 rol x39 rol x3a rol x3b rol x3c rol x3d rol x3e rol x3f ; <<< 24 ; f e d c b a 9 8 7 6 5 4 3 2 1 0 => ; c b a 9 8 7 6 5 4 3 2 1 0 f e d ; mov x3j, x30 ; mov x30, x3d ; mov x3d, x3a ; mov x3a, x37 ; mov x37, x34 ; mov x34, x31 ; mov x31, x3e ; mov x3e, x3b ; mov x3b, x38 ; mov x38, x35 ; mov x35, x32 ; mov x32, x3f ; mov x3f, x3c ; mov x3c, x39 ; mov x39, x36 ; mov x36, x33 ; mov x33, x3j ; an intentionally arrangement of registers to facilitate movw ; x30 r8 ; x3d r10 ; x3a r12 ; x37 r14 ; x34 r16 ; x31 r18 ; x3e r20 ; x3b r22 ; x38 r9 ; x35 r11 ; x32 r13 ; x3f r15 ; x3c r17 ; x39 r19 ; x36 r21 ; x33 r23 movw t0j, x30 ; t1j:t0j <= x38:x30 movw x30, x3d ; x38:x30 <= x35:x3d movw x3d, x3a ; x35:x3d <= x32:x3a movw x3a, x37 ; x32:x3a <= x3f:x37 movw x37, x34 ; x3f:x37 <= x3c:x34 movw x34, x31 ; x3c:x34 <= x39:x31 movw x31, x3e ; x39:x31 <= x36:x3e movw x3e, x3b ; x36:x3e <= x33:x3b mov x3b, t1j ; x3b <= x38 mov x33, t0j ; x33 <= x30 pop rcnt dec rcnt push rcnt breq round_loop_end rjmp round_loop_start round_loop_end: pop rcnt ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES) ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES) std Y + 0x00, x20 std Y + 0x02, x22 std Y + 0x04, x24 std Y + 0x06, x26 std Y + 0x08, x28 std Y + 0x0a, x2a std Y + 0x0c, x2c std Y + 0x0e, x2e adiw YL, ROW_INBYTES st Y+, x30 st Y+, x31 st Y+, x32 st Y+, x33 st Y+, x34 st Y+, x35 st Y+, x36 st Y+, x37 st Y+, x38 st Y+, x39 st Y+, x3a st Y+, x3b st Y+, x3c st Y+, x3d st Y+, x3e st Y+, x3f POP_CONFLICT ret .section .text #if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH) RC_LFSR7: .byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 .byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a .byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c .byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b .byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a .byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f .byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 .byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 .byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 .byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 .byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 .byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 .byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c .byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a .byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f .byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 RC_LFSR8: .byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e .byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25 .byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0 .byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9 .byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37 .byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20 .byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad .byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac .byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0 .byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb .byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a .byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8 .byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86 .byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7 .byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c .byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5 .byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4 .byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52 .byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7 .byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76 .byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b .byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4 .byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd .byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46 .byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e .byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab .byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5 .byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09 .byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe .byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17 .byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d .byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00 #elif defined(CRYPTO_AEAD) RC_LFSR7: .byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03 .byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a .byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c .byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b .byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a .byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f .byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43 .byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09 .byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36 .byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37 .byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31 .byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25 .byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c .byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a .byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f .byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00 #else RC_LFSR8: .byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e .byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25 .byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0 .byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9 .byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37 .byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20 .byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad .byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac .byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0 .byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb .byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a .byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8 .byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86 .byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7 .byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c .byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5 .byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4 .byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52 .byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7 .byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76 .byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b .byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4 .byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd .byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46 .byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e .byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab .byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5 .byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09 .byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe .byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17 .byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d .byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00 #endif