knot384.h
5.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.1 2020 by KNOT Team *
; **********************************************
;
; an intentionally arrangement of registers to facilitate movw
#define x20 r0
#define x21 r2
#define x22 r4
#define x23 r6
#define x24 r8
#define x25 r10
#define x26 r1
#define x27 r3
#define x28 r5
#define x29 r7
#define x2a r9
#define x2b r11
; an intentionally arrangement of registers to facilitate movw
#define x30 r22
#define x35 r20
#define x3a r18
#define x33 r16
#define x38 r14
#define x31 r12
#define x36 r23
#define x3b r21
#define x34 r19
#define x39 r17
#define x32 r15
#define x37 r13
#define t0j r24
#define t1j r25
#define x0j r25
#define x1j r27
#include "assist.h"
.macro Sbox i0, i1, i2, i3
ldi t0j, 0xFF
eor \i0, t0j
mov t0j, \i1
and \i1, \i0
eor \i1, \i2
or \i2, t0j
eor \i0, \i3
eor \i2, \i0
eor t0j, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, t0j
and t0j, \i2
eor \i1, t0j
.endm
.macro OneColumn i0, i1, i2, i3
ld \i0, Y
ldd \i1, Y + ROW_INBYTES
Sbox \i0, \i1, \i2, \i3
st Y+, \i0
rol \i1 ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES -1, \i1
.endm
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
ld x20, Y+
ld x21, Y+
ld x22, Y+
ld x23, Y+
ld x24, Y+
ld x25, Y+
ld x26, Y+
ld x27, Y+
ld x28, Y+
ld x29, Y+
ld x2a, Y+
ld x2b, Y+
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
ld x38, Y+
ld x39, Y+
ld x3a, Y+
ld x3b, Y+
ldi ZL, lo8(RC_LFSR7)
ldi ZH, hi8(RC_LFSR7)
round_loop_start:
; AddRC
lpm t0j, Z+
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
ld x0j, Y
eor x0j, t0j
ldd x1j, Y + ROW_INBYTES
Sbox x0j, x1j, x20, x30
st Y+, x0j
lsl x1j ; ShiftRows -- Row 1 <<< 1
std Y + ROW_INBYTES -1, x1j
OneColumn x0j, x1j, x21, x31
OneColumn x0j, x1j, x22, x32
OneColumn x0j, x1j, x23, x33
OneColumn x0j, x1j, x24, x34
OneColumn x0j, x1j, x25, x35
OneColumn x0j, x1j, x26, x36
OneColumn x0j, x1j, x27, x37
OneColumn x0j, x1j, x28, x38
OneColumn x0j, x1j, x29, x39
OneColumn x0j, x1j, x2a, x3a
OneColumn x0j, x1j, x2b, x3b
ld x1j, Y
eor t0j, t0j
adc x1j, t0j
st Y, x1j
; b a 9 8 7 6 5 4 3 2 1 0
; -- -- -- -- -- -- -- -- -- -- -- x- 0
; -- -- -- -- -- -- -- -- -- -- -- x' 0
; -- -- -- -- -- -- -- -- -- -- x- -- 1
; -- -- -- -- x' -- -- -- -- -- -- -- 7
; 4 3 2 1 0 b a 9 8 7 6 5
; ShiftRows -- the last two rows
; <<< 8
; b a 9 8 7 6 5 4 3 2 1 0 => a 9 8 7 6 5 4 3 2 1 0 b
movw t0j, x25 ; t1j:t0j <= x2b:x25
movw x25, x24 ; x2b:x25 <= x2a:x24
movw x24, x23 ; x2a:x24 <= x29:x23
movw x23, x22 ; x29:x23 <= x28:x22
movw x22, x21 ; x28:x22 <= x27:x21
movw x21, x20 ; x27:x21 <= x26:x20
mov x26, t0j ; x26 <= x25
mov x20, t1j ; x20 <= x2b
; >>> 1
mov t0j, x3b
ror t0j
ror x3a
ror x39
ror x38
ror x37
ror x36
ror x35
ror x34
ror x33
ror x32
ror x31
ror x30
ror x3b
; <<< 56
; b a 9 8 7 6 5 4 3 2 1 0 => 4 3 2 1 0 b a 9 8 7 6 5
; mov x3j, x30
; mov x30, x35
; mov x35, x3a
; mov x3a, x33
; mov x33, x38
; mov x38, x31
; mov x31, x36
; mov x36, x3b
; mov x3b, x34
; mov x34, x39
; mov x39, x32
; mov x32, x37
; mov x37, x3j
; an intentionally arrangement of registers to facilitate movw
; x30 r22
; x35 r20
; x3a r18
; x33 r16
; x38 r14
; x31 r12
; x36 r23
; x3b r21
; x34 r19
; x39 r17
; x32 r15
; x37 r13
movw t0j, x30 ; t1j:t0j <= x36:x30
movw x30, x35 ; x36:x30 <= x3b:x35
movw x35, x3a ; x3b:x35 <= x34:x3a
movw x3a, x33 ; x34:x3a <= x39:x33
movw x33, x38 ; x39:x33 <= x32:x38
movw x38, x31 ; x32:x38 <= x37:x31
mov x31, t1j ; x31 <= x36
mov x37, t0j ; x37 <= x30
dec rcnt
breq round_loop_end
jmp round_loop_start
round_loop_end:
ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
st Y+, x20
st Y+, x21
st Y+, x22
st Y+, x23
st Y+, x24
st Y+, x25
st Y+, x26
st Y+, x27
st Y+, x28
st Y+, x29
st Y+, x2a
st Y+, x2b
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
st Y+, x38
st Y+, x39
st Y+, x3a
st Y+, x3b
POP_CONFLICT
ret
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00