knot512.h 12.2 KB
Newer Older
包珍珍 committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435
;
; **********************************************
; * KNOT: a family of bit-slice lightweight    *
; *       authenticated encryption algorithms  *
; *       and hash functions                   *
; *                                            *
; * Assembly implementation for 8-bit AVR CPU  *
; * Version 1.1 2020 by KNOT Team              *
; **********************************************
;
#define x20  r0
#define x22  r2
#define x24  r4
#define x26  r6
#define x28  r1
#define x2a  r3
#define x2c  r5
#define x2e  r7

#define x30  r8
#define x3d  r10
#define x3a  r12
#define x37  r14
#define x34  r16
#define x31  r18
#define x3e  r20
#define x3b  r22
#define x38  r9
#define x35  r11
#define x32  r13
#define x3f  r15
#define x3c  r17
#define x39  r19
#define x36  r21
#define x33  r23

#define t0j  r24
#define t1j  r25
#define x0j  r25
#define x1j  r27
#define x2j  r26

#include "assist.h"

.macro Sbox i0, i1, i2, i3
    ldi  t0j,  0xFF
    eor  \i0,  t0j
    mov  t0j,  \i1
    and  \i1,   \i0
    eor  \i1,   \i2
    or   \i2,   t0j
    eor  \i0,   \i3
    eor  \i2,   \i0
    eor  t0j,   \i3
    and  \i0,   \i1
    eor  \i3,   \i1
    eor  \i0,   t0j
    and  t0j,  \i2
    eor  \i1,   t0j
.endm

.macro TwoColumns i2_e, i3_e, i3_o
    ; column 2i
    ld   x0j, Y
    ldd  x1j, Y + ROW_INBYTES
    Sbox x0j, x1j, \i2_e, \i3_e
    st   Y+, x0j
    rol  x1j                     ; ShiftRows -- Row 1 <<< 1
    std  Y + ROW_INBYTES - 1, x1j

    ; column 2i+1
    ld   x0j, Y
    ldd  x1j, Y + ROW_INBYTES
    Sbox x0j, x1j, x2j, \i3_o
    st   Y+, x0j
    rol  x1j                     ; ShiftRows -- Row 1 <<< 1
    std  Y + ROW_INBYTES - 1, x1j
    ldd  t0j, Y + 2 * ROW_INBYTES + 1
    std  Y + 2 * ROW_INBYTES + 1, x2j
    mov  x2j, t0j
.endm

Permutation:
    PUSH_CONFLICT
    mov rcnt, rn
    push rcnt

    ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
    ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)

    ldd x20, Y + 0x00
    ldd x22, Y + 0x02
    ldd x24, Y + 0x04
    ldd x26, Y + 0x06
    ldd x28, Y + 0x08
    ldd x2a, Y + 0x0a
    ldd x2c, Y + 0x0c
    ldd x2e, Y + 0x0e

    adiw YL, ROW_INBYTES

    ld  x30, Y+
    ld  x31, Y+
    ld  x32, Y+
    ld  x33, Y+
    ld  x34, Y+
    ld  x35, Y+
    ld  x36, Y+
    ld  x37, Y+
    ld  x38, Y+
    ld  x39, Y+
    ld  x3a, Y+
    ld  x3b, Y+
    ld  x3c, Y+
    ld  x3d, Y+
    ld  x3e, Y+
    ld  x3f, Y+

#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
    sbrc AEDH,  2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH
    rjmp For_Hash
For_AEAD:
    ldi  ZL, lo8(RC_LFSR7)
    ldi  ZH, hi8(RC_LFSR7)
    rjmp round_loop_start
For_Hash:
    ldi  ZL, lo8(RC_LFSR8)
    ldi  ZH, hi8(RC_LFSR8)
#elif defined(CRYPTO_AEAD)
    ldi  ZL, lo8(RC_LFSR7)
    ldi  ZH, hi8(RC_LFSR7)
#else
    ldi  ZL, lo8(RC_LFSR8)
    ldi  ZH, hi8(RC_LFSR8)
#endif


round_loop_start:
	; AddRC
	lpm  t0j,  Z+
    ldi  YH, hi8(SRAM_STATE)
    ldi  YL, lo8(SRAM_STATE)

    ; column 0
    ld   x0j, Y
    eor  x0j, t0j
    ldd  x1j, Y + ROW_INBYTES
    Sbox x0j, x1j, x20, x30
    st   Y+, x0j
    lsl  x1j                     ; ShiftRows -- Row 1 <<< 1
    std  Y + ROW_INBYTES - 1, x1j

    ; column 1
    ld   x0j, Y
    ldd  x1j, Y + ROW_INBYTES
    ldd  x2j, Y + 2 * ROW_INBYTES
    Sbox x0j, x1j, x2j, x31
    st   Y+, x0j
    rol  x1j                     ; ShiftRows -- Row 1 <<< 1
    std  Y + ROW_INBYTES - 1, x1j
    ldd  t0j, Y + 2 * ROW_INBYTES + 1
    std  Y + 2 * ROW_INBYTES + 1, x2j
    mov  x2j, t0j

    ; column 2, 3
    TwoColumns x22, x32, x33
    ; column 4, 5
    TwoColumns x24, x34, x35
    ; column 6, 7
    TwoColumns x26, x36, x37
    ; column 8, 9
    TwoColumns x28, x38, x39
    ; column 10, 11
    TwoColumns x2a, x3a, x3b
    ; column 12, 13
    TwoColumns x2c, x3c, x3d

    ; column 14
    ld   x0j, Y
    ldd  x1j, Y + ROW_INBYTES
    Sbox x0j, x1j, x2e, x3e
    st   Y+, x0j
    rol  x1j                     ; ShiftRows -- Row 1 <<< 1
    std  Y + ROW_INBYTES - 1, x1j

    ; column 15
    ld   x0j, Y
    ldd  x1j, Y + ROW_INBYTES
    Sbox x0j, x1j, x2j, x3f
    st   Y+, x0j
    rol  x1j                     ; ShiftRows -- Row 1 <<< 1
    std  Y + ROW_INBYTES - 1, x1j

    ld  x1j, Y
    eor t0j, t0j
    adc x1j, t0j
    st  Y,   x1j
    std  Y + ROW_INBYTES + 1, x2j

    ;  f  e  d  c  b  a  9  8  7  6  5  4  3  2  1  0
    ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0
    ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0
    ; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2
    ; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3
    ;  c  b  a  9  8  7  6  5  4  3  2  1  0  f  e  d
    ; x2e x2c x2a x28 x26 x24 x22 x20 => x2c x2a x28 x26 x24 x22 x20 x2e
    ;mov  t0j, x2e
    ;mov  x2e, x2c
    ;mov  x2c, x2a
    ;mov  x2a, x28
    ;mov  x28, x26
    ;mov  x26, x24
    ;mov  x24, x22
    ;mov  x22, x20
    ;mov  x20, t0j
    ; an intentionally arrangement of registers to facilitate movw
	movw t0j, x26  ; t1j:t0j <= x2e:x26
	movw x26, x24  ; x2e:x26 <= x2c:x24
	movw x24, x22  ; x2c:x24 <= x2a:x22
	movw x22, x20  ; x2a:x22 <= x28:x20
	mov  x20, t1j  ; x20 <= t1j
	mov  x28, t0j  ; x28 <= t0j

    ; <<< 1
    mov  t0j, x3f
    rol  t0j
    rol  x30
    rol  x31
    rol  x32
    rol  x33
    rol  x34
    rol  x35
    rol  x36
    rol  x37
    rol  x38
    rol  x39
    rol  x3a
    rol  x3b
    rol  x3c
    rol  x3d
    rol  x3e
    rol  x3f
    ; <<< 24
    ; f  e  d  c  b  a  9  8  7  6  5  4  3  2  1  0 =>
    ; c  b  a  9  8  7  6  5  4  3  2  1  0  f  e  d
    ; mov  x3j, x30
    ; mov  x30, x3d
    ; mov  x3d, x3a
    ; mov  x3a, x37
    ; mov  x37, x34
    ; mov  x34, x31
    ; mov  x31, x3e
    ; mov  x3e, x3b
    ; mov  x3b, x38
    ; mov  x38, x35
    ; mov  x35, x32
    ; mov  x32, x3f
    ; mov  x3f, x3c
    ; mov  x3c, x39
    ; mov  x39, x36
    ; mov  x36, x33
    ; mov  x33, x3j
    ; an intentionally arrangement of registers to facilitate movw
    ; x30  r8
    ; x3d  r10
    ; x3a  r12
    ; x37  r14
    ; x34  r16
    ; x31  r18
    ; x3e  r20
    ; x3b  r22
    ; x38  r9
    ; x35  r11
    ; x32  r13
    ; x3f  r15
    ; x3c  r17
    ; x39  r19
    ; x36  r21
    ; x33  r23
    movw t0j, x30  ; t1j:t0j <= x38:x30
    movw x30, x3d  ; x38:x30 <= x35:x3d
    movw x3d, x3a  ; x35:x3d <= x32:x3a
    movw x3a, x37  ; x32:x3a <= x3f:x37
    movw x37, x34  ; x3f:x37 <= x3c:x34
    movw x34, x31  ; x3c:x34 <= x39:x31
    movw x31, x3e  ; x39:x31 <= x36:x3e
    movw x3e, x3b  ; x36:x3e <= x33:x3b
    mov  x3b, t1j  ; x3b <= x38
    mov  x33, t0j  ; x33 <= x30

    pop rcnt
    dec rcnt
    push rcnt
    breq round_loop_end
    rjmp round_loop_start

round_loop_end:
    pop rcnt

    ldi YH, hi8(SRAM_STATE + 2 * ROW_INBYTES)
    ldi YL, lo8(SRAM_STATE + 2 * ROW_INBYTES)
    std Y + 0x00, x20
    std Y + 0x02, x22
    std Y + 0x04, x24
    std Y + 0x06, x26
    std Y + 0x08, x28
    std Y + 0x0a, x2a
    std Y + 0x0c, x2c
    std Y + 0x0e, x2e
    adiw YL, ROW_INBYTES
    st   Y+, x30
    st   Y+, x31
    st   Y+, x32
    st   Y+, x33
    st   Y+, x34
    st   Y+, x35
    st   Y+, x36
    st   Y+, x37
    st   Y+, x38
    st   Y+, x39
    st   Y+, x3a
    st   Y+, x3b
    st   Y+, x3c
    st   Y+, x3d
    st   Y+, x3e
    st   Y+, x3f

    POP_CONFLICT
ret

.section .text
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
RC_LFSR8:
.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e
.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25
.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0
.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9
.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37
.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20
.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad
.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac
.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0
.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb
.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a
.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8
.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86
.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7
.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c
.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5
.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4
.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52
.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7
.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76
.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b
.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4
.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd
.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46
.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e
.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab
.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5
.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09
.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe
.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17
.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d
.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00
#elif defined(CRYPTO_AEAD)
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#else
RC_LFSR8:
.byte 0x01, 0x02, 0x04, 0x08, 0x11, 0x23, 0x47, 0x8e
.byte 0x1c, 0x38, 0x71, 0xe2, 0xc4, 0x89, 0x12, 0x25
.byte 0x4b, 0x97, 0x2e, 0x5c, 0xb8, 0x70, 0xe0, 0xc0
.byte 0x81, 0x03, 0x06, 0x0c, 0x19, 0x32, 0x64, 0xc9
.byte 0x92, 0x24, 0x49, 0x93, 0x26, 0x4d, 0x9b, 0x37
.byte 0x6e, 0xdc, 0xb9, 0x72, 0xe4, 0xc8, 0x90, 0x20
.byte 0x41, 0x82, 0x05, 0x0a, 0x15, 0x2b, 0x56, 0xad
.byte 0x5b, 0xb6, 0x6d, 0xda, 0xb5, 0x6b, 0xd6, 0xac
.byte 0x59, 0xb2, 0x65, 0xcb, 0x96, 0x2c, 0x58, 0xb0
.byte 0x61, 0xc3, 0x87, 0x0f, 0x1f, 0x3e, 0x7d, 0xfb
.byte 0xf6, 0xed, 0xdb, 0xb7, 0x6f, 0xde, 0xbd, 0x7a
.byte 0xf5, 0xeb, 0xd7, 0xae, 0x5d, 0xba, 0x74, 0xe8
.byte 0xd1, 0xa2, 0x44, 0x88, 0x10, 0x21, 0x43, 0x86
.byte 0x0d, 0x1b, 0x36, 0x6c, 0xd8, 0xb1, 0x63, 0xc7
.byte 0x8f, 0x1e, 0x3c, 0x79, 0xf3, 0xe7, 0xce, 0x9c
.byte 0x39, 0x73, 0xe6, 0xcc, 0x98, 0x31, 0x62, 0xc5
.byte 0x8b, 0x16, 0x2d, 0x5a, 0xb4, 0x69, 0xd2, 0xa4
.byte 0x48, 0x91, 0x22, 0x45, 0x8a, 0x14, 0x29, 0x52
.byte 0xa5, 0x4a, 0x95, 0x2a, 0x54, 0xa9, 0x53, 0xa7
.byte 0x4e, 0x9d, 0x3b, 0x77, 0xee, 0xdd, 0xbb, 0x76
.byte 0xec, 0xd9, 0xb3, 0x67, 0xcf, 0x9e, 0x3d, 0x7b
.byte 0xf7, 0xef, 0xdf, 0xbf, 0x7e, 0xfd, 0xfa, 0xf4
.byte 0xe9, 0xd3, 0xa6, 0x4c, 0x99, 0x33, 0x66, 0xcd
.byte 0x9a, 0x35, 0x6a, 0xd4, 0xa8, 0x51, 0xa3, 0x46
.byte 0x8c, 0x18, 0x30, 0x60, 0xc1, 0x83, 0x07, 0x0e
.byte 0x1d, 0x3a, 0x75, 0xea, 0xd5, 0xaa, 0x55, 0xab
.byte 0x57, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0xf2, 0xe5
.byte 0xca, 0x94, 0x28, 0x50, 0xa1, 0x42, 0x84, 0x09
.byte 0x13, 0x27, 0x4f, 0x9f, 0x3f, 0x7f, 0xff, 0xfe
.byte 0xfc, 0xf8, 0xf0, 0xe1, 0xc2, 0x85, 0x0b, 0x17
.byte 0x2f, 0x5e, 0xbc, 0x78, 0xf1, 0xe3, 0xc6, 0x8d
.byte 0x1a, 0x34, 0x68, 0xd0, 0xa0, 0x40, 0x80, 0x00
#endif