knot256.h 7.39 KB
Newer Older
包珍珍 committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
;
; **********************************************
; * KNOT: a family of bit-slice lightweight    *
; *       authenticated encryption algorithms  *
; *       and hash functions                   *
; *                                            *
; * Assembly implementation for 8-bit AVR CPU  *
; * Version 1.1 2020 by KNOT Team              *
; **********************************************
;
#define x10  r0
#define x11  r1
#define x12  r2
#define x13  r3
#define x14  r4
#define x15  r5
#define x16  r6
#define x17  r7

; an intentionally arrangement of registers to facilitate movw
#define x20  r8
#define x21  r10
#define x22  r12
#define x23  r14
#define x24  r9
#define x25  r11
#define x26  r13
#define x27  r15

; an intentionally arrangement of registers to facilitate movw
#define x30  r16
#define x35  r18
#define x32  r20
#define x37  r22
#define x34  r17
#define x31  r19
#define x36  r21
#define x33  r23

#define t0j  r24
#define t1j  r25
#define x0j  r27

#include "assist.h"

.macro Sbox i0, i1, i2, i3
    mov  t0j,  \i1
    com  \i0
    and  \i1,   \i0
    eor  \i1,   \i2
    or   \i2,   t0j
    eor  \i0,   \i3
    eor  \i2,   \i0
    eor  t0j,   \i3
    and  \i0,   \i1
    eor  \i3,   \i1
    eor  \i0,   t0j
    and  t0j,  \i2
    eor  \i1,   t0j
.endm

Permutation:
    PUSH_CONFLICT
    mov rcnt, rn

 	ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
    ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
    ld  x10, Y+
    ld  x11, Y+
    ld  x12, Y+
    ld  x13, Y+
    ld  x14, Y+
    ld  x15, Y+
    ld  x16, Y+
    ld  x17, Y+
    ld  x20, Y+
    ld  x21, Y+
    ld  x22, Y+
    ld  x23, Y+
    ld  x24, Y+
    ld  x25, Y+
    ld  x26, Y+
    ld  x27, Y+
    ld  x30, Y+
    ld  x31, Y+
    ld  x32, Y+
    ld  x33, Y+
    ld  x34, Y+
    ld  x35, Y+
    ld  x36, Y+
    ld  x37, Y+
	
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
    sbrc AEDH,  2 ; AEDH[2] = 0 for AEAD and AEDH[2] = 1 for HASH
    rjmp For_Hash
For_AEAD:
    ldi  ZL, lo8(RC_LFSR6)
    ldi  ZH, hi8(RC_LFSR6)
    rjmp round_loop_start
For_Hash:
    ldi  ZL, lo8(RC_LFSR7)
    ldi  ZH, hi8(RC_LFSR7)
#elif defined(CRYPTO_AEAD)
    ldi  ZL, lo8(RC_LFSR6)
    ldi  ZH, hi8(RC_LFSR6)
#else
    ldi  ZL, lo8(RC_LFSR7)
    ldi  ZH, hi8(RC_LFSR7)
#endif

round_loop_start:
	; AddRC
	lpm  t0j,  Z+
    ldi  YH, hi8(SRAM_STATE)
    ldi  YL, lo8(SRAM_STATE)
    ld   x0j, Y
    eor  x0j, t0j

    ; SubColumns
    Sbox x0j, x10, x20, x30
    st   Y+, x0j
    ld   x0j, Y
    Sbox x0j, x11, x21, x31
    st   Y+, x0j
    ld   x0j, Y
    Sbox x0j, x12, x22, x32
    st   Y+, x0j
    ld   x0j, Y
    Sbox x0j, x13, x23, x33
    st   Y+, x0j
    ld   x0j, Y
    Sbox x0j, x14, x24, x34
    st   Y+, x0j
    ld   x0j, Y
    Sbox x0j, x15, x25, x35
    st   Y+, x0j
    ld   x0j, Y
    Sbox x0j, x16, x26, x36
    st   Y+, x0j
    ld   x0j, Y
    Sbox x0j, x17, x27, x37
    st   Y, x0j

    ; ShiftRows
    ; <<< 1
    mov  t0j, x17
    rol  t0j
    rol  x10
    rol  x11
    rol  x12
    rol  x13
    rol  x14
    rol  x15
    rol  x16
    rol  x17

    ; <<< 8
	; 7 6 5 4 3 2 1 0 => 6 5 4 3 2 1 0 7
    ;mov  t0j, x27
    ;mov  x27, x26
    ;mov  x26, x25
    ;mov  x25, x24
    ;mov  x24, x23
    ;mov  x23, x22
    ;mov  x22, x21
    ;mov  x21, x20
    ;mov  x20, t0j
    ; an intentionally arrangement of registers to facilitate movw
	movw t0j, x23  ; t1j:t0j <= x27:x23
	movw x23, x22  ; x27:x23 <= x26:x22
	movw x22, x21  ; x26:x22 <= x25:x21
	movw x21, x20  ; x25:x21 <= x24:x20
	mov  x20, t1j  ; x20 <= t1j
	mov  x24, t0j  ; x24 <= t0j

    ; <<< 1
    mov  t0j, x37
    rol  t0j
    rol  x30
    rol  x31
    rol  x32
    rol  x33
    rol  x34
    rol  x35
    rol  x36
    rol  x37
    ; <<< 24
    ; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5
    ;mov  t0j, x30
    ;mov  x30, x35
    ;mov  x35, x32
    ;mov  x32, x37
    ;mov  x37, x34
    ;mov  x34, x31
    ;mov  x31, x36
    ;mov  x36, x33
    ;mov  x33, t0j
    ; an intentionally arrangement of registers to facilitate movw
    ;x30 r16
    ;x35 r18
    ;x32 r20
    ;x37 r22
    ;x34 r17
    ;x31 r19
    ;x36 r21
    ;x33 r23
    movw t0j, x30 ; t1j:t0j <= x34:x30
    movw x30, x35 ; x34:x30 <= x31:x35
    movw x35, x32 ; x31:x35 <= x36:x32
    movw x32, x37 ; x36:x32 <= x33:x37
    mov  x37, t1j ; x37 <= x34
    mov  x33, t0j ; x33 <= x30

    dec rcnt
    breq round_loop_end
    jmp round_loop_start

round_loop_end:
    ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
    ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
    st   Y+, x10
    st   Y+, x11
    st   Y+, x12
    st   Y+, x13
    st   Y+, x14
    st   Y+, x15
    st   Y+, x16
    st   Y+, x17
    st   Y+, x20
    st   Y+, x21
    st   Y+, x22
    st   Y+, x23
    st   Y+, x24
    st   Y+, x25
    st   Y+, x26
    st   Y+, x27
    st   Y+, x30
    st   Y+, x31
    st   Y+, x32
    st   Y+, x33
    st   Y+, x34
    st   Y+, x35
    st   Y+, x36
    st   Y+, x37
    
    POP_CONFLICT
ret


.section .text
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
RC_LFSR6:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06
.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29
.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28
.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24
.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37
.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26
.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f
.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#elif defined(CRYPTO_AEAD)
RC_LFSR6:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x21, 0x03, 0x06
.byte 0x0c, 0x18, 0x31, 0x22, 0x05, 0x0a, 0x14, 0x29
.byte 0x13, 0x27, 0x0f, 0x1e, 0x3d, 0x3a, 0x34, 0x28
.byte 0x11, 0x23, 0x07, 0x0e, 0x1c, 0x39, 0x32, 0x24
.byte 0x09, 0x12, 0x25, 0x0b, 0x16, 0x2d, 0x1b, 0x37
.byte 0x2e, 0x1d, 0x3b, 0x36, 0x2c, 0x19, 0x33, 0x26
.byte 0x0d, 0x1a, 0x35, 0x2a, 0x15, 0x2b, 0x17, 0x2f
.byte 0x1f, 0x3f, 0x3e, 0x3c, 0x38, 0x30, 0x20, 0x00
#else
RC_LFSR7:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x41, 0x03
.byte 0x06, 0x0c, 0x18, 0x30, 0x61, 0x42, 0x05, 0x0a
.byte 0x14, 0x28, 0x51, 0x23, 0x47, 0x0f, 0x1e, 0x3c
.byte 0x79, 0x72, 0x64, 0x48, 0x11, 0x22, 0x45, 0x0b
.byte 0x16, 0x2c, 0x59, 0x33, 0x67, 0x4e, 0x1d, 0x3a
.byte 0x75, 0x6a, 0x54, 0x29, 0x53, 0x27, 0x4f, 0x1f
.byte 0x3e, 0x7d, 0x7a, 0x74, 0x68, 0x50, 0x21, 0x43
.byte 0x07, 0x0e, 0x1c, 0x38, 0x71, 0x62, 0x44, 0x09
.byte 0x12, 0x24, 0x49, 0x13, 0x26, 0x4d, 0x1b, 0x36
.byte 0x6d, 0x5a, 0x35, 0x6b, 0x56, 0x2d, 0x5b, 0x37
.byte 0x6f, 0x5e, 0x3d, 0x7b, 0x76, 0x6c, 0x58, 0x31
.byte 0x63, 0x46, 0x0d, 0x1a, 0x34, 0x69, 0x52, 0x25
.byte 0x4b, 0x17, 0x2e, 0x5d, 0x3b, 0x77, 0x6e, 0x5c
.byte 0x39, 0x73, 0x66, 0x4c, 0x19, 0x32, 0x65, 0x4a
.byte 0x15, 0x2a, 0x55, 0x2b, 0x57, 0x2f, 0x5f, 0x3f
.byte 0x7f, 0x7e, 0x7c, 0x78, 0x70, 0x60, 0x40, 0x00
#endif