knot512.h 5.66 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
;
; **********************************************
; * KNOT: a family of bit-slice lightweight    *
; *       authenticated encryption algorithms  *
; *       and hash functions                   *
; *                                            *
; * Assembly implementation for 8-bit AVR CPU  *
; * Version 1.0 2020 by KNOT Team              *
; **********************************************
;
#include "assist.h"

Permutation:
    PUSH_CONFLICT
    mov rcnt, rn

    ldi rc, 0x01
    ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES)
    ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES)
    ld  x30, Y+
    ld  x31, Y+
    ld  x32, Y+
    ld  x33, Y+
    ld  x34, Y+
    ld  x35, Y+
    ld  x36, Y+
    ld  x37, Y+
    ld  x38, Y+
    ld  x39, Y+
    ld  x3a, Y+
    ld  x3b, Y+
    ld  x3c, Y+
    ld  x3d, Y+
    ld  x3e, Y+
    ld  x3f, Y+

round_loop_start:
    rjmp AddRC_SubColumns_Start

load_columns_table:
    rjmp load_column0
    rjmp load_column1
    rjmp load_column2
    rjmp load_column3
    rjmp load_column4
    rjmp load_column5
    rjmp load_column6
    rjmp load_column7
    rjmp load_column8
    rjmp load_column9
    rjmp load_columna
    rjmp load_columnb
    rjmp load_columnc
    rjmp load_columnd
    rjmp load_columne
    rjmp load_columnf
    rjmp amend_shiftRow

load_column0:
    mov  x3j, x30
    rjmp Sbox_one_column
load_column1:
    mov  x30, x3j
    mov  x3j, x31
    rjmp Sbox_one_column
load_column2:
    mov  x31, x3j
    mov  x3j, x32
    rjmp Sbox_one_column
load_column3:
    mov  x32, x3j
    mov  x3j, x33
    rjmp Sbox_one_column
load_column4:
    mov  x33, x3j
    mov  x3j, x34
    rjmp Sbox_one_column
load_column5:
    mov  x34, x3j
    mov  x3j, x35
    rjmp Sbox_one_column
load_column6:
    mov  x35, x3j
    mov  x3j, x36
    rjmp Sbox_one_column
load_column7:
    mov  x36, x3j
    mov  x3j, x37
    rjmp Sbox_one_column
load_column8:
    mov  x37, x3j
    mov  x3j, x38
    rjmp Sbox_one_column
load_column9:
    mov  x38, x3j
    mov  x3j, x39
    rjmp Sbox_one_column
load_columna:
    mov  x39, x3j
    mov  x3j, x3a
    rjmp Sbox_one_column
load_columnb:
    mov  x3a, x3j
    mov  x3j, x3b
    rjmp Sbox_one_column
load_columnc:
    mov  x3b, x3j
    mov  x3j, x3c
    rjmp Sbox_one_column
load_columnd:
    mov  x3c, x3j
    mov  x3j, x3d
    rjmp Sbox_one_column
load_columne:
    mov  x3d, x3j
    mov  x3j, x3e
    rjmp Sbox_one_column
load_columnf:
    mov  x3e, x3j
    mov  x3j, x3f
    rjmp Sbox_one_column

#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
LFSR_table:
    rjmp LFSR7
    rjmp LFSR8
LFSR7:
    LFSR7_MACRO
    rjmp LFSR_DONE
LFSR8:
    LFSR8_MACRO
    rjmp LFSR_DONE
#endif

;;;;;;;;;;;;;;;;;;;;;;;; Real Start
AddRC_SubColumns_Start:
    ldi  YH, hi8(SRAM_STATE)
    ldi  YL, lo8(SRAM_STATE)
    clr  ccnt
    ld   x0j, Y
    eor  x0j, rc

#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
    ldi  ZL, pm_lo8(LFSR_table)
    ldi  ZH, pm_hi8(LFSR_table)
    sbrc AEDH,  2 ; AEDH[2] = 0 for AEAD and AEDH[1] = 1 for HASH
    adiw ZL, 1
    ijmp
LFSR_DONE:
#elif defined(CRYPTO_AEAD)
    LFSR7_MACRO ; only AEAD
#else
    LFSR8_MACRO ; only HASH
#endif

    ldd  x1j, Y + ROW_INBYTES
    ldd  x2j, Y + 2 * ROW_INBYTES
    ldd  t2j, Y + 2 * ROW_INBYTES + 1
    ldi  ZL, pm_lo8(load_columns_table)
    ldi  ZH, pm_hi8(load_columns_table)
    ijmp
Sbox_one_column:
    Sbox x0j, x1j, x2j, x3j

    ;  f  e  d  c  b  a  9  8  7  6  5  4  3  2  1  0
    ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x- 0
    ; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- x' 0
    ; -- -- -- -- -- -- -- -- -- -- -- -- -- x- -- -- 2
    ; -- -- -- -- -- -- -- -- -- -- -- -- x' -- -- -- 3
    ;  c  b  a  9  8  7  6  5  4  3  2  1  0  f  e  d
    ; Store a byte to Row 0
    st   Y, x0j
    ; Store a byte combined with ShiftRow1
    lsl  t1j
    mov  t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left)
    rol  x1j
    std  Y + ROW_INBYTES, x1j
    ; Store a byte combined with ShiftRow2
    inc  ccnt
    cpi  ccnt, ROW_INBYTES - 1
    brsh ROW2_WRAP
    ldd  tmp0, Y + 2 * ROW_INBYTES + 2 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes
    std  Y + 2 * ROW_INBYTES + 2, x2j
    mov  x2j, t2j
    mov  t2j, tmp0
    jmp  NO_ROW2_WRAP
ROW2_WRAP:
    std  Y + ROW_INBYTES + 2, x2j
    mov  x2j, t2j

    ; remain ShiftRow3 to be done at 'amend_shiftRow'
NO_ROW2_WRAP:
    adiw YL, 1
    ld   x0j, Y
    ldd  x1j, Y + ROW_INBYTES

    adiw ZL, 1
    ijmp

amend_shiftRow:
    ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
    ldi YL, lo8(SRAM_STATE + ROW_INBYTES)

    ld  x1j, Y
    bst t1j, 7
    bld x1j, 0
    st  Y,   x1j

    ; <<< 1
    mov  x3f, x3j
    rol  x3j
    rol  x30
    rol  x31
    rol  x32
    rol  x33
    rol  x34
    rol  x35
    rol  x36
    rol  x37
    rol  x38
    rol  x39
    rol  x3a
    rol  x3b
    rol  x3c
    rol  x3d
    rol  x3e
    rol  x3f
    ; <<< 24
    ; f  e  d  c  b  a  9  8  7  6  5  4  3  2  1  0 =>
    ; c  b  a  9  8  7  6  5  4  3  2  1  0  f  e  d
    mov  x3j, x30
    mov  x30, x3d
    mov  x3d, x3a
    mov  x3a, x37
    mov  x37, x34
    mov  x34, x31
    mov  x31, x3e
    mov  x3e, x3b
    mov  x3b, x38
    mov  x38, x35
    mov  x35, x32
    mov  x32, x3f
    mov  x3f, x3c
    mov  x3c, x39
    mov  x39, x36
    mov  x36, x33
    mov  x33, x3j

    dec rcnt
    breq round_loop_end
    rjmp round_loop_start

round_loop_end:

    ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES)
    ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES)
    st   Y+, x30
    st   Y+, x31
    st   Y+, x32
    st   Y+, x33
    st   Y+, x34
    st   Y+, x35
    st   Y+, x36
    st   Y+, x37
    st   Y+, x38
    st   Y+, x39
    st   Y+, x3a
    st   Y+, x3b
    st   Y+, x3c
    st   Y+, x3d
    st   Y+, x3e
    st   Y+, x3f

    POP_CONFLICT
ret