knot256.h 4.12 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
;
; **********************************************
; * KNOT: a family of bit-slice lightweight    *
; *       authenticated encryption algorithms  *
; *       and hash functions                   *
; *                                            *
; * Assembly implementation for 8-bit AVR CPU  *
; * Version 1.0 2020 by KNOT Team              *
; **********************************************
;
#include "assist.h"

Permutation:
    PUSH_CONFLICT
    mov rcnt, rn

    ldi rc, 0x01
    ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES)
    ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES)
    ld  x30, Y+
    ld  x31, Y+
    ld  x32, Y+
    ld  x33, Y+
    ld  x34, Y+
    ld  x35, Y+
    ld  x36, Y+
    ld  x37, Y+

round_loop_start:
    rjmp AddRC_SubColumns_Start

load_columns_table:
    rjmp load_column0
    rjmp load_column1
    rjmp load_column2
    rjmp load_column3
    rjmp load_column4
    rjmp load_column5
    rjmp load_column6
    rjmp load_column7
    rjmp amend_shiftRow

load_column0:
    mov  x3j, x30
    rjmp Sbox_one_column
load_column1:
    mov  x30, x3j
    mov  x3j, x31
    rjmp Sbox_one_column
load_column2:
    mov  x31, x3j
    mov  x3j, x32
    rjmp Sbox_one_column
load_column3:
    mov  x32, x3j
    mov  x3j, x33
    rjmp Sbox_one_column
load_column4:
    mov  x33, x3j
    mov  x3j, x34
    rjmp Sbox_one_column
load_column5:
    mov  x34, x3j
    mov  x3j, x35
    rjmp Sbox_one_column
load_column6:
    mov  x35, x3j
    mov  x3j, x36
    rjmp Sbox_one_column
load_column7:
    mov  x36, x3j
    mov  x3j, x37
    rjmp Sbox_one_column

#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
LFSR_table:
    rjmp LFSR6
    rjmp LFSR7
LFSR6:
    LFSR6_MACRO
    rjmp LFSR_DONE
LFSR7:
    LFSR7_MACRO
    rjmp LFSR_DONE
#endif

;;;;;;;;;;;;;;;;;;;;;;;; Real Start
AddRC_SubColumns_Start:
    ldi  YH, hi8(SRAM_STATE)
    ldi  YL, lo8(SRAM_STATE)
    clr  ccnt
    ld   x0j, Y
    eor  x0j, rc

#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
    ldi  ZL, pm_lo8(LFSR_table)
    ldi  ZH, pm_hi8(LFSR_table)
    sbrc AEDH,  2 ; AEDH[2] = 0 for AEAD and AEDH[1] = 1 for HASH
    adiw ZL, 1
    ijmp
LFSR_DONE:
#elif defined(CRYPTO_AEAD)
    LFSR6_MACRO ; only AEAD
#else
    LFSR7_MACRO ; only HASH
#endif

    ldd  x1j, Y + ROW_INBYTES
    ldd  x2j, Y + 2 * ROW_INBYTES
    ldi  ZL, pm_lo8(load_columns_table)
    ldi  ZH, pm_hi8(load_columns_table)
    ijmp
Sbox_one_column:
    Sbox x0j, x1j, x2j, x3j

    ;  7  6  5  4  3  2  1  0
    ; -- -- -- -- -- -- -- x- 0
    ; -- -- -- -- -- -- -- x' 0
    ; -- -- -- -- -- -- x- -- 1
    ; -- -- -- -- x' -- -- -- 3
    ;  4  3  2  1  0  7  6  5
    ; Store a byte to Row 0
    st   Y, x0j
    ; Store a byte combined with ShiftRow1
    lsl  t1j
    mov  t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left)
    rol  x1j
    std  Y + ROW_INBYTES, x1j
    ; Store a byte combined with ShiftRow2
    inc  ccnt
    cpi  ccnt, ROW_INBYTES
    breq ROW2_WRAP
    ldd  t2j, Y + 2 * ROW_INBYTES + 1 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes
    std  Y + 2 * ROW_INBYTES + 1, x2j
    mov  x2j, t2j
    jmp  NO_ROW2_WRAP
ROW2_WRAP:
    std  Y + ROW_INBYTES + 1, x2j
    ; remain ShiftRow3 to be done at 'amend_shiftRow'
NO_ROW2_WRAP:
    adiw YL, 1
    ld   x0j, Y
    ldd  x1j, Y + ROW_INBYTES

    adiw ZL, 1
    ijmp

amend_shiftRow:
    ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
    ldi YL, lo8(SRAM_STATE + ROW_INBYTES)

    ld  x1j, Y
    bst t1j, 7
    bld x1j, 0
    st  Y,   x1j

    ; <<< 1
    mov  x37, x3j
    rol  x3j
    rol  x30
    rol  x31
    rol  x32
    rol  x33
    rol  x34
    rol  x35
    rol  x36
    rol  x37
    ; <<< 24
    ; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5
    mov  x3j, x30
    mov  x30, x35
    mov  x35, x32
    mov  x32, x37
    mov  x37, x34
    mov  x34, x31
    mov  x31, x36
    mov  x36, x33
    mov  x33, x3j

    dec rcnt
    breq round_loop_end
    rjmp round_loop_start

round_loop_end:
    ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES)
    ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES)
    st   Y+, x30
    st   Y+, x31
    st   Y+, x32
    st   Y+, x33
    st   Y+, x34
    st   Y+, x35
    st   Y+, x36
    st   Y+, x37
    
    POP_CONFLICT
ret