Commit 7860b7c6 by 包珍珍 Committed by Enrico Pozzobon

new optimized implementations

parent 389d7d2c
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.0 2020 by KNOT Team *
; **********************************************
;
.macro LFSR6_MACRO
bst rc, 5
bld tmp0, 0
bst rc, 4
bld tmp1, 0
eor tmp0, tmp1
ror tmp0
rol rc
andi rc, 0x3F
.endm
.macro LFSR7_MACRO
bst rc, 6
bld tmp0, 0
bst rc, 5
bld tmp1, 0
eor tmp0, tmp1
ror tmp0
rol rc
andi rc, 0x7F
.endm
.macro LFSR8_MACRO
bst rc, 7
bld tmp0, 0
bst rc, 5
bld tmp1, 0
eor tmp0, tmp1
bst rc, 4
bld tmp1, 0
eor tmp0, tmp1
bst rc, 3
bld tmp1, 0
eor tmp0, tmp1
ror tmp0
rol rc
.endm
.macro Sbox i0, i1, i2, i3
mov tmp0, \i1
com \i0
and \i1, \i0
eor \i1, \i2
or \i2, tmp0
eor \i0, \i3
eor \i2, \i0
eor tmp0, \i3
and \i0, \i1
eor \i3, \i1
eor \i0, tmp0
and tmp0, \i2
eor \i1, tmp0
.endm
.macro PUSH_CONFLICT
push r16
push r17
push r18
push r19
push r23
push r24
push r26
push r27
push r28
push r29
push r30
push r31
.endm
.macro POP_CONFLICT
pop r31
pop r30
pop r29
pop r28
pop r27
pop r26
pop r24
pop r23
pop r19
pop r18
pop r17
pop r16
.endm
.macro PUSH_ALL
push r2
push r3
push r4
push r5
push r6
push r7
push r8
push r9
push r10
push r11
push r12
push r13
push r14
push r15
push r16
push r17
push r28
push r29
.endm
.macro POP_ALL
pop r29
pop r28
pop r17
pop r16
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
pop r9
pop r8
pop r7
pop r6
pop r5
pop r4
pop r3
pop r2
clr r1
.endm
\ No newline at end of file
#ifndef __CONFIG_H__
#define __CONFIG_H__
#define CRYPTO_AEAD
//#define CRYPTO_HASH
#define MAX_MESSAGE_LENGTH 128
#define STATE_INBITS 256
/* For CRYPTO_AEAD */
#define CRYPTO_KEYBITS 128
/* For CRYPTO_HASH */
#define CRYPTO_BITS 256
#define STATE_INBYTES ((STATE_INBITS + 7) / 8)
#define ROW_INBITS ((STATE_INBITS + 3) / 4)
#define ROW_INBYTES ((ROW_INBITS + 7) / 8)
/* For CRYPTO_AEAD */
#define CRYPTO_KEYBYTES ((CRYPTO_KEYBITS + 7) / 8)
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES CRYPTO_KEYBYTES
#define CRYPTO_ABYTES CRYPTO_KEYBYTES
#define CRYPTO_NOOVERLAP 1
#define MAX_ASSOCIATED_DATA_LENGTH 32
#define MAX_CIPHER_LENGTH (MAX_MESSAGE_LENGTH + CRYPTO_ABYTES)
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
/* For CRYPTO_HASH */
#define CRYPTO_BYTES ((CRYPTO_BITS + 7) / 8)
#define DOMAIN_BITS 0x80
#define PAD_BITS 0x01
#define S384_R192_BITS 0x80
#if (STATE_INBITS==256)
#define C1 1
#define C2 8
#define C3 25
#elif (STATE_INBITS==384)
#define C1 1
#define C2 8
#define C3 55
#elif (STATE_INBITS==512)
#define C1 1
#define C2 16
#define C3 25
#else
#error "Not specified state size"
#endif
#ifdef CRYPTO_AEAD
/* For CRYPTO_AEAD */
#define KEY_INBITS (CRYPTO_KEYBYTES * 8)
#define KEY_INBYTES (CRYPTO_KEYBYTES)
#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8)
#define NONCE_INBYTES (CRYPTO_NPUBBYTES)
#define TAG_INBITS (CRYPTO_ABYTES * 8)
#define TAG_INBYTES (CRYPTO_ABYTES)
#if (KEY_INBITS==128) && (STATE_INBITS==256)
#define RATE_INBITS 64
#define NR_0 52
#define NR_i 28
#define NR_f 32
#elif (KEY_INBITS==128) && (STATE_INBITS==384)
#define RATE_INBITS 192
#define NR_0 76
#define NR_i 28
#define NR_f 32
#elif (KEY_INBITS==192) && (STATE_INBITS==384)
#define RATE_INBITS 96
#define NR_0 76
#define NR_i 40
#define NR_f 44
#elif (KEY_INBITS==256) && (STATE_INBITS==512)
#define RATE_INBITS 128
#define NR_0 100
#define NR_i 52
#define NR_f 56
#else
#error "Not specified key size and state size"
#endif
#define RATE_INBYTES ((RATE_INBITS + 7) / 8)
#define SQUEEZE_RATE_INBYTES TAG_INBYTES
#endif
#ifdef CRYPTO_HASH
/* For CRYPTO_HASH */
#define HASH_DIGEST_INBITS (CRYPTO_BYTES * 8)
#if (HASH_DIGEST_INBITS==256) && (STATE_INBITS==256)
#define HASH_RATE_INBITS 32
#define HASH_SQUEEZE_RATE_INBITS 128
#define NR_h 68
#elif (HASH_DIGEST_INBITS==256) && (STATE_INBITS==384)
#define HASH_RATE_INBITS 128
#define HASH_SQUEEZE_RATE_INBITS 128
#define NR_h 80
#elif (HASH_DIGEST_INBITS==384) && (STATE_INBITS==384)
#define HASH_RATE_INBITS 48
#define HASH_SQUEEZE_RATE_INBITS 192
#define NR_h 104
#elif (HASH_DIGEST_INBITS==512) && (STATE_INBITS==512)
#define HASH_RATE_INBITS 64
#define HASH_SQUEEZE_RATE_INBITS 256
#define NR_h 140
#else
#error "Not specified hash digest size and state size"
#endif
#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8)
#define HASH_SQUEEZE_RATE_INBYTES ((HASH_SQUEEZE_RATE_INBITS + 7) / 8)
#endif
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
#endif
\ No newline at end of file
#ifdef __cplusplus
extern "C" {
#endif
int crypto_aead_encrypt(
unsigned char *c,unsigned long long *clen,
const unsigned char *m,unsigned long long mlen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m,unsigned long long *outputmlen,
unsigned char *nsec,
const unsigned char *c,unsigned long long clen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
#ifdef __cplusplus
}
#endif
#include <avr/io.h>
#include <avr/sfr_defs.h>
#include <stdlib.h>
#include <string.h>
#include "config.h"
extern void crypto_aead_encrypt_asm(
unsigned char *c,
const unsigned char *m,
unsigned char mlen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern int crypto_aead_decrypt_asm(
unsigned char *m,
const unsigned char *c,
unsigned char clen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern void crypto_hash_asm(
unsigned char *out,
const unsigned char *in,
unsigned char inlen
);
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the cipher implementation goes here,
... generating a ciphertext c[0],c[1],...,c[*clen-1]
... from a plaintext m[0],m[1],...,m[mlen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce npub[0],npub[1],..
... and secret key k[0],k[1],...
... the implementation shall not use nsec
...
... return 0;
*/
(void)nsec;
crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k);
*clen = mlen + TAG_INBYTES;
return 0;
}
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the AEAD implementation goes here,
... generating a plaintext m[0],m[1],...,m[*mlen-1]
... and secret message number nsec[0],nsec[1],...
... from a ciphertext c[0],c[1],...,c[clen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce number npub[0],npub[1],...
... and secret key k[0],k[1],...
...
... return 0;
*/
unsigned long long mlen_;
unsigned char tag_is_match;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
return -1;
}
mlen_ = clen - CRYPTO_ABYTES;
tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k);
if (tag_is_match != 0)
{
memset(m, 0, (size_t)mlen_);
return -1;
}
*mlen = mlen_;
return 0;
}
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.0 2020 by KNOT Team *
; **********************************************
;
#include "assist.h"
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi rc, 0x01
ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES)
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
round_loop_start:
rjmp AddRC_SubColumns_Start
load_columns_table:
rjmp load_column0
rjmp load_column1
rjmp load_column2
rjmp load_column3
rjmp load_column4
rjmp load_column5
rjmp load_column6
rjmp load_column7
rjmp amend_shiftRow
load_column0:
mov x3j, x30
rjmp Sbox_one_column
load_column1:
mov x30, x3j
mov x3j, x31
rjmp Sbox_one_column
load_column2:
mov x31, x3j
mov x3j, x32
rjmp Sbox_one_column
load_column3:
mov x32, x3j
mov x3j, x33
rjmp Sbox_one_column
load_column4:
mov x33, x3j
mov x3j, x34
rjmp Sbox_one_column
load_column5:
mov x34, x3j
mov x3j, x35
rjmp Sbox_one_column
load_column6:
mov x35, x3j
mov x3j, x36
rjmp Sbox_one_column
load_column7:
mov x36, x3j
mov x3j, x37
rjmp Sbox_one_column
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
LFSR_table:
rjmp LFSR6
rjmp LFSR7
LFSR6:
LFSR6_MACRO
rjmp LFSR_DONE
LFSR7:
LFSR7_MACRO
rjmp LFSR_DONE
#endif
;;;;;;;;;;;;;;;;;;;;;;;; Real Start
AddRC_SubColumns_Start:
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
clr ccnt
ld x0j, Y
eor x0j, rc
#if defined(CRYPTO_AEAD) && defined(CRYPTO_HASH)
ldi ZL, pm_lo8(LFSR_table)
ldi ZH, pm_hi8(LFSR_table)
sbrc AEDH, 2 ; AEDH[2] = 0 for AEAD and AEDH[1] = 1 for HASH
adiw ZL, 1
ijmp
LFSR_DONE:
#elif defined(CRYPTO_AEAD)
LFSR6_MACRO ; only AEAD
#else
LFSR7_MACRO ; only HASH
#endif
ldd x1j, Y + ROW_INBYTES
ldd x2j, Y + 2 * ROW_INBYTES
ldi ZL, pm_lo8(load_columns_table)
ldi ZH, pm_hi8(load_columns_table)
ijmp
Sbox_one_column:
Sbox x0j, x1j, x2j, x3j
; 7 6 5 4 3 2 1 0
; -- -- -- -- -- -- -- x- 0
; -- -- -- -- -- -- -- x' 0
; -- -- -- -- -- -- x- -- 1
; -- -- -- -- x' -- -- -- 3
; 4 3 2 1 0 7 6 5
; Store a byte to Row 0
st Y, x0j
; Store a byte combined with ShiftRow1
lsl t1j
mov t1j, x1j ; back up the last updated byte in t1j, to be used in shiftRow1 (1 bit left)
rol x1j
std Y + ROW_INBYTES, x1j
; Store a byte combined with ShiftRow2
inc ccnt
cpi ccnt, ROW_INBYTES
breq ROW2_WRAP
ldd t2j, Y + 2 * ROW_INBYTES + 1 ; load next byte, the last updated byte needed to be shifted to the address of the next bytes
std Y + 2 * ROW_INBYTES + 1, x2j
mov x2j, t2j
jmp NO_ROW2_WRAP
ROW2_WRAP:
std Y + ROW_INBYTES + 1, x2j
; remain ShiftRow3 to be done at 'amend_shiftRow'
NO_ROW2_WRAP:
adiw YL, 1
ld x0j, Y
ldd x1j, Y + ROW_INBYTES
adiw ZL, 1
ijmp
amend_shiftRow:
ldi YH, hi8(SRAM_STATE + ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + ROW_INBYTES)
ld x1j, Y
bst t1j, 7
bld x1j, 0
st Y, x1j
; <<< 1
mov x37, x3j
rol x3j
rol x30
rol x31
rol x32
rol x33
rol x34
rol x35
rol x36
rol x37
; <<< 24
; 7 6 5 4 3 2 1 0 => 4 3 2 1 0 7 6 5
mov x3j, x30
mov x30, x35
mov x35, x32
mov x32, x37
mov x37, x34
mov x34, x31
mov x31, x36
mov x36, x33
mov x33, x3j
dec rcnt
breq round_loop_end
rjmp round_loop_start
round_loop_end:
ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES)
st Y+, x30
st Y+, x31
st Y+, x32
st Y+, x33
st Y+, x34
st Y+, x35
st Y+, x36
st Y+, x37
POP_CONFLICT
ret
\ No newline at end of file
;
; **********************************************
; * KNOT: a family of bit-slice lightweight *
; * authenticated encryption algorithms *
; * and hash functions *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.0 2020 by KNOT Team *
; **********************************************
;
#include "assist.h"
Permutation:
PUSH_CONFLICT
mov rcnt, rn
ldi rc, 0x01
ldi YH, hi8(SRAM_STATE + 3 * ROW_INBYTES)
ldi YL, lo8(SRAM_STATE + 3 * ROW_INBYTES)
ld x30, Y+
ld x31, Y+
ld x32, Y+
ld x33, Y+
ld x34, Y+
ld x35, Y+
ld x36, Y+
ld x37, Y+
ld x38, Y+
ld x39, Y+
ld x3a, Y+
ld x3b, Y+
round_loop_start:
rjmp AddRC_SubColumns_Start
load_columns_table:
rjmp load_column0
rjmp load_column1
rjmp load_column2
rjmp load_column3
rjmp load_column4
rjmp load_column5
rjmp load_column6
rjmp load_column7
rjmp load_column8
rjmp load_column9
rjmp load_columna
rjmp load_columnb
rjmp amend_shiftRow
load_column0:
mov x3j, x30
rjmp Sbox_one_column
load_column1:
mov x30, x3j
mov x3j, x31
rjmp Sbox_one_column
load_column2:
mov x31, x3j
mov x3j, x32
rjmp Sbox_one_column
load_column3:
mov x32, x3j
mov x3j, x33
rjmp Sbox_one_column
load_column4:
mov x33, x3j
mov x3j, x34
rjmp Sbox_one_column
load_column5:
mov x34, x3j
mov x3j, x35
rjmp Sbox_one_column
load_column6:
mov x35, x3j
mov x3j, x36
rjmp Sbox_one_column
load_column7:
mov x36, x3j
mov x3j, x37
rjmp Sbox_one_column
load_column8:
mov x37, x3j
mov x3j, x38
rjmp Sbox_one_column
load_column9:
mov x38, x3j
mov x3j, x39
rjmp Sbox_one_column
load_columna:
mov x39, x3j
mov x3j, x3a
rjmp Sbox_one_column
load_columnb:
mov x3a, x3j
mov x3j, x3b
rjmp Sbox_one_column