Commit 4f2227ae by 包珍珍 Committed by Enrico Pozzobon

avr8_lowrom for photon-beetle

parent 7860b7c6
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
;
; **********************************************
; * PHOTON-Beetle *
; * Authenticated Encryption and Hash Family *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.0 2020 by PHOTON-Beetle Team *
; **********************************************
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Bitslice
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
.MACRO Reorder_8_bits i0, i1, i2, i3, i4
ror \i0
ror \i1
ror \i0
ror \i2
ror \i0
ror \i3
ror \i0
ror \i4
ror \i0
ror \i1
ror \i0
ror \i2
ror \i0
ror \i3
ror \i0
ror \i4
.ENDM
.MACRO InvReorder_8_bits i0, i1, i2, i3, i4
ror \i1
ror \i0
ror \i2
ror \i0
ror \i3
ror \i0
ror \i4
ror \i0
ror \i1
ror \i0
ror \i2
ror \i0
ror \i3
ror \i0
ror \i4
ror \i0
.ENDM
; require XH:XL be the address of the input
Load_Reorder_32_bits:
ldi cnt1, 4
reorder_8_bits_loop:
ld rmp, X+
Reorder_8_bits rmp, x0, x1, x2, x3
dec cnt1
brne reorder_8_bits_loop
ret
; require YH:YL be the address of the output
invReorder_Store_32_bits:
ldi cnt1, 4
invreorder_8_bits_loop:
InvReorder_8_bits rmp, x0, x1, x2, x3
st Y+, rmp
dec cnt1
brne invreorder_8_bits_loop
ret
; require XH:XL be the address of the input
; require YH:YL be the address of the output
Load_Reorder_Store_128_bits:
ldi cnt0, 4
reorder_32_bits_loop:
rcall Load_Reorder_32_bits
st Y+, x0
st Y+, x1
st Y+, x2
st Y+, x3
dec cnt0
brne reorder_32_bits_loop
ret
; require XH:XL be the address of the input
; require YH:YL be the address of the output
Load_invReorder_Store_128_bits:
ldi cnt0, 4
invreorder_32_bits_loop:
ld x0, X+
ld x1, X+
ld x2, X+
ld x3, X+
rcall invReorder_Store_32_bits
dec cnt0
brne invreorder_32_bits_loop
ret
.macro PUSH_ALL
push r2
push r3
push r4
push r5
push r6
push r7
push r8
push r9
push r10
push r11
push r12
push r13
push r14
push r15
push r16
push r17
push r28
push r29
.endm
.macro POP_ALL
pop r29
pop r28
pop r17
pop r16
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
pop r9
pop r8
pop r7
pop r6
pop r5
pop r4
pop r3
pop r2
clr r1
.endm
#ifdef __cplusplus
extern "C" {
#endif
int crypto_aead_encrypt(
unsigned char *c,unsigned long long *clen,
const unsigned char *m,unsigned long long mlen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m,unsigned long long *outputmlen,
unsigned char *nsec,
const unsigned char *c,unsigned long long clen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
#ifdef __cplusplus
}
#endif
#include <avr/io.h>
#include <avr/sfr_defs.h>
#include <stdlib.h>
#include <string.h>
#include "api.h"
extern void crypto_aead_encrypt_asm(
unsigned char *c,
const unsigned char *m,
unsigned char mlen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern char crypto_aead_decrypt_asm(
unsigned char *m,
const unsigned char *c,
unsigned char clen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern void crypto_hash_asm(
unsigned char *out,
const unsigned char *in,
unsigned char inlen
);
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the cipher implementation goes here,
... generating a ciphertext c[0],c[1],...,c[*clen-1]
... from a plaintext m[0],m[1],...,m[mlen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce npub[0],npub[1],..
... and secret key k[0],k[1],...
... the implementation shall not use nsec
...
... return 0;
*/
(void)nsec;
crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k);
*clen = mlen + CRYPTO_ABYTES;
return 0;
}
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the AEAD implementation goes here,
... generating a plaintext m[0],m[1],...,m[*mlen-1]
... and secret message number nsec[0],nsec[1],...
... from a ciphertext c[0],c[1],...,c[clen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce number npub[0],npub[1],...
... and secret key k[0],k[1],...
...
... return 0;
*/
unsigned long long mlen_;
char tag_is_match;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
return -1;
}
mlen_ = clen - CRYPTO_ABYTES;
tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k);
if (tag_is_match != 0)
{
memset(m, 0, (size_t)mlen_);
return -1;
}
*mlen = mlen_;
return 0;
}
\ No newline at end of file
;
; **********************************************
; * PHOTON-Beetle *
; * Authenticated Encryption and Hash Family *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.0 2020 by PHOTON-Beetle Team *
; **********************************************
;
#include "api.h"
#define CRYPTO_AEAD
#define STATE_INBITS 256
#define STATE_INBYTES ((STATE_INBITS + 7) / 8)
#define RATE_INBITS 128
#define RATE_INBYTES ((RATE_INBITS + 7) / 8)
#define RATE_INBYTES_MASK (RATE_INBYTES - 1)
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
; For CRYPTO_HASH
#define CRYPTO_BYTES 32
#define INITIAL_RATE_INBITS 128
#define INITIAL_RATE_INBYTES ((INITIAL_RATE_INBITS + 7) / 8)
#define HASH_RATE_INBITS 32
#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8)
#define HASH_RATE_INBYTES_MASK (HASH_RATE_INBYTES - 1)
;
; ============================================
; R E G I S T E R D E F I N I T I O N S
; ============================================
;
; [Add all register names here, include info on
; all used registers without specific names]
; rmp: Multipurpose register
#define rmp r16
#define rate r17
#define mclen r18
#define radlen r19
#define adlen_org r0
#define cnt0 r20
#define cnt1 r21
#define cnt2 r22
#define SQUEEZE_RATE_INBITS 128
#define SQUEEZE_RATE_INBYTES ((SQUEEZE_RATE_INBITS + 7) / 8)
#define SQUEEZE_RATE_MASK (SQUEEZE_RATE_INBYTES - 1)
#define CAPACITY_INBITS (STATE_INBITS - RATE_INBITS)
#define CAPACITY_INBYTES ((CAPACITY_INBITS + 7) / 8)
; For CRYPTO_AEAD
#define KEY_INBITS (CRYPTO_KEYBYTES * 8)
#define KEY_INBYTES (CRYPTO_KEYBYTES)
#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8)
#define NONCE_INBYTES (CRYPTO_NPUBBYTES)
#define TAG_INBITS (CRYPTO_ABYTES * 8)
#define TAG_INBYTES (CRYPTO_ABYTES)
#define t0 r8
#define t1 r9
#define t2 r10
#define t3 r11
#define x0 r12
#define x1 r13
#define x2 r14
#define x3 r15
#define ed r1
#define addr0 r2
#define addr1 r3
#define addr2 r4
#define addr3 r5
#define addr4 r6
#define addr5 r7
; domain_cnt overlap with cnt0, only temporarily used, no need to back up
#define domain_cnt r20
#define domain_cnt0 r23
#define domain_cnt1 r24
.section .noinit
SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0
SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0
SRAM_MESSAGE_IN_LEN: .BYTE 0, 0
#ifdef CRYPTO_AEAD
; For CRYPTO_AEAD
SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0
SRAM_ADLEN: .BYTE 0, 0
SRAM_NONCE_ADDR: .BYTE 0, 0
SRAM_KEY_ADDR: .BYTE 0, 0
SRAM_ADDITIONAL: .BYTE 0, 0, 0, 0
.BYTE 0, 0, 0, 0
.BYTE 0, 0, 0, 0
.BYTE 0, 0, 0, 0
#endif
; SRAM required additionally, besides those used for API
SRAM_PAD: .BYTE 0, 0, 0, 0
#if ((defined(CRYPTO_AEAD) && (RATE_INBYTES > 4)) || defined(CRYPTO_HASH))
.BYTE 0, 0, 0, 0
.BYTE 0, 0, 0, 0
.BYTE 0, 0, 0, 0
#endif
.section .text
#include "assist.h"
#include "photon.h"
AddDomainCounter:
ldi YH, hi8(SRAM_STATE + STATE_INBYTES - 3)
ldi YL, lo8(SRAM_STATE + STATE_INBYTES - 3)
ldi rmp, 0x80
ldi cnt1, 3
check_domain_bit:
ror domain_cnt
brcc no_xor
ld x0, Y
eor x0, rmp
st Y, x0
no_xor:
adiw YL, 1
dec cnt1
brne check_domain_bit
ret
; require XH:XL be the address of the current associated data/message block
XOR_to_State:
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
mov cnt0, rate
dec cnt0
XOR_to_State_loop:
rcall Load_Reorder_32_bits
ld rmp, Y
eor rmp, x0
st Y+, rmp
ld rmp, Y
eor rmp, x1
st Y+, rmp
ld rmp, Y
eor rmp, x2
st Y+, rmp
ld rmp, Y
eor rmp, x3
st Y+, rmp
subi cnt0, 4
brsh XOR_to_State_loop
; XH:XL are now the address of the next associated data/message block if this is not the last block
ret
; require XH:XL pointed to the source data to be padded
PAD_OneZero:
ldi YH, hi8(SRAM_PAD)
ldi YL, lo8(SRAM_PAD)
mov cnt1, rate
pad_copy:
ld rmp, X+
st Y+, rmp
dec cnt1
dec cnt0
brne pad_copy
pad_one:
ldi rmp, 1
st Y+, rmp
dec cnt1
breq pad_end
clr rmp
pad_zero:
st Y+, rmp
dec cnt1
brne pad_zero
pad_end:
ldi XH, hi8(SRAM_PAD)
ldi XL, lo8(SRAM_PAD)
; XH:XL are now pointed to last block needed to be processed
ret
HASH:
movw addr0, XL
hash_block_loop:
rcall PHOTON_Permutation
movw XL, addr0
cp rate, radlen
brsh hash_last_block
rcall XOR_to_State
movw addr0, XL
sub radlen, rate
rjmp hash_block_loop
hash_last_block:
cp radlen, rate
breq hash_xor_domain
mov cnt0, radlen
rcall PAD_OneZero
hash_xor_domain:
clr radlen
rcall XOR_to_State
mov domain_cnt, domain_cnt0
rcall AddDomainCounter
ret
TAG:
rcall PHOTON_Permutation
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
movw YL, addr2
rcall Load_invReorder_Store_128_bits
ret
#ifdef CRYPTO_AEAD
.IF (RATE_INBITS == 128)
XOR_to_Cipher:
mov t2, rate
cp t2, mclen
brlo XOR_to_Cipher_Start
mov t2, mclen ; backup the real length of the remaining message
XOR_to_Cipher_Start:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
ldi YH, hi8(SRAM_ADDITIONAL)
ldi YL, lo8(SRAM_ADDITIONAL)
rcall Load_invReorder_Store_128_bits ; State move to additional SRAM and reorder
movw XL, addr0
movw ZL, addr2
; XOR Part 2
sbiw YL, (RATE_INBYTES>>1) ; Pointed to Part 2
ldi cnt0, (RATE_INBYTES>>1)
cp cnt0, mclen
brlo XOR_Part2_Store_Cipher_begin
mov cnt0, mclen
XOR_Part2_Store_Cipher_begin:
sub mclen, cnt0
XOR_Part2_Store_Cipher_loop:
ld t0, Y+
ld x0, X+
eor x0, t0
st Z+, x0
dec cnt0
brne XOR_Part2_Store_Cipher_loop
cpi mclen, 1
brlo XOR_to_Cipher_END
; XOR (Part 1 >>> 1)
ldi cnt0, (RATE_INBYTES>>1)
cp cnt0, mclen
brlo XOR_Part1_Store_Cipher_begin
mov cnt0, mclen
XOR_Part1_Store_Cipher_begin:
sub mclen, cnt0
ldi YH, hi8(SRAM_ADDITIONAL)
ldi YL, lo8(SRAM_ADDITIONAL)
ld t0, Y
bst t0, 0
adiw YL, (RATE_INBYTES>>1)-1
ld t0, Y
ror t0
bld t0, 7
st Y, t0
ldi cnt1, (RATE_INBYTES>>1)-1
ROR_part1_loop:
ld t0, -Y
ror t0
st Y, t0
dec cnt1
brne ROR_part1_loop
XOR_Part1_Store_Cipher_loop:
ld t0, Y+
ld x0, X+
eor x0, t0
st Z+, x0
dec cnt0
brne XOR_Part1_Store_Cipher_loop
XOR_to_Cipher_END:
tst ed
brne XOR_to_Cipher_dec
XOR_to_Cipher_enc:
movw XL, addr0
cp t2, rate
brsh XOR_to_Cipher_XOR_to_State
mov cnt0, t2
rcall PAD_OneZero
rjmp XOR_to_Cipher_XOR_to_State
XOR_to_Cipher_dec:
movw XL, addr2
cp t2, rate
brsh XOR_to_Cipher_XOR_to_State
; need to be padded
mov cnt0, t2
rcall PAD_OneZero
XOR_to_Cipher_XOR_to_State:
rcall XOR_to_State
clr rmp
add addr0, t2
adc addr1, rmp
add addr2, t2
adc addr3, rmp
ret
.ELSE
; RATE_INBITS == 32
XOR_to_Cipher:
mov t2, rate
cp t2, mclen
brlo XOR_to_Cipher_Start
mov t2, mclen ; backup the real length of the remaining message
XOR_to_Cipher_Start:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
ld x0, X+
ld x1, X+
ld x2, X+
ld x3, X+
ldi YH, hi8(SRAM_ADDITIONAL)
ldi YL, lo8(SRAM_ADDITIONAL)
rcall invReorder_Store_32_bits
movw XL, addr0
movw ZL, addr2
; XOR Part 2
sbiw YL, (RATE_INBYTES>>1) ; Pointed to Part 2
ldi cnt0, (RATE_INBYTES>>1)
cp cnt0, mclen
brlo XOR_Part2_Store_Cipher_begin
mov cnt0, mclen
XOR_Part2_Store_Cipher_begin:
sub mclen, cnt0
XOR_Part2_Store_Cipher_loop:
ld t0, Y+
ld x0, X+
eor x0, t0
st Z+, x0
dec cnt0
brne XOR_Part2_Store_Cipher_loop
cpi mclen, 1
brlo XOR_to_Cipher_END
; XOR (Part 1 >>> 1)
ldi cnt0, (RATE_INBYTES>>1)
cp cnt0, mclen
brlo XOR_Part1_Store_Cipher_begin
mov cnt0, mclen
XOR_Part1_Store_Cipher_begin:
sub mclen, cnt0
ldi YH, hi8(SRAM_ADDITIONAL)
ldi YL, lo8(SRAM_ADDITIONAL)
ld t0, Y+
ld t1, Y+
bst t0, 0
ror t1
ror t0
bld t1, 7
ld x0, X+
eor x0, t0
st Z+, x0
dec cnt0
breq XOR_to_Cipher_END
ld x0, X+
eor x0, t1
st Z+, x0
XOR_to_Cipher_END:
tst ed
brne XOR_to_Cipher_dec
XOR_to_Cipher_enc:
movw XL, addr0
cp t2, rate
brsh XOR_to_Cipher_XOR_to_State
mov cnt0, t2
rcall PAD_OneZero
rjmp XOR_to_Cipher_XOR_to_State
XOR_to_Cipher_dec:
movw XL, addr2
cp t2, rate
brsh XOR_to_Cipher_XOR_to_State
; need to be padded
mov cnt0, t2
rcall PAD_OneZero
XOR_to_Cipher_XOR_to_State:
rcall XOR_to_State
clr rmp
add addr0, t2
adc addr1, rmp
add addr2, t2
adc addr3, rmp
ret
.ENDIF
ENC:
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
lds XH, SRAM_MESSAGE_IN_ADDR
lds XL, SRAM_MESSAGE_IN_ADDR + 1
movw addr0, XL
movw addr2, ZL
enc_block_loop:
rcall PHOTON_Permutation
rcall XOR_to_Cipher
cpi mclen, 1
brsh enc_block_loop
mov domain_cnt, domain_cnt1
rcall AddDomainCounter
ret
AUTH_AND_ENCDEC:
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
lds XH, SRAM_NONCE_ADDR
lds XL, SRAM_NONCE_ADDR + 1
rcall Load_Reorder_Store_128_bits
lds XH, SRAM_KEY_ADDR
lds XL, SRAM_KEY_ADDR + 1
rcall Load_Reorder_Store_128_bits
ldi domain_cnt0, 1
ldi domain_cnt1, 1
test_adlen_zero:
tst radlen
breq adlen_zero_test_mlen_zero
; radlen != 0
adlen_nzero_test_mlen_zero:
tst mclen
brne test_adlen_divisible
ldi domain_cnt0, 3
test_adlen_divisible:
mov rmp, radlen
andi rmp, RATE_INBYTES_MASK
breq hash_ad
inc domain_cnt0 ; 2 or 4
hash_ad:
lds XH, SRAM_ASSOCIATED_DATA_ADDR
lds XL, SRAM_ASSOCIATED_DATA_ADDR + 1
rcall HASH
tst mclen
breq mlen_zero_inputout_address
rjmp test_mlen_divisible
adlen_zero_test_mlen_zero:
ldi domain_cnt1, 5
tst mclen
breq adlen_zero_mlen_zero
; mclen != 0
test_mlen_divisible:
mov rmp, mclen
andi rmp, RATE_INBYTES_MASK
breq enc_dec_m
inc domain_cnt1 ; 2 or 6
enc_dec_m:
rcall ENC
rjmp AUTH_AND_ENCDEC_end
adlen_zero_mlen_zero:
; empty message and empty associated data
ldi YH, hi8(SRAM_STATE + STATE_INBYTES - 3)
ldi YL, lo8(SRAM_STATE + STATE_INBYTES - 3)
ld x0, Y
ldi rmp, 0x80
eor x0, rmp
st Y, x0
mlen_zero_inputout_address:
tst ed
brne dec_inputout_address
enc_inputout_address:
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
movw addr2, ZL
rjmp AUTH_AND_ENCDEC_end
dec_inputout_address:
lds ZH, SRAM_MESSAGE_IN_ADDR
lds ZL, SRAM_MESSAGE_IN_ADDR + 1
movw addr0, ZL
AUTH_AND_ENCDEC_end:
ret
; void crypto_aead_encrypt_asm(
; unsigned char *c,
; const unsigned char *m,
; unsigned long long mlen,
; const unsigned char *ad,
; unsigned long long radlen,
; const unsigned char *npub,
; const unsigned char *k
; )
;
; unsigned char *c, is passed in r24:r25
; const unsigned char *m, is passed in r22:r23
; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used
; const unsigned char *ad, is passed in r18:r19
; unsigned long long radlen, is passed in r16:r17, only LSB (r16) is used
; const unsigned char *npub, is passed in r14:r15
; const unsigned char *k is passed in r12:r13
.global crypto_aead_encrypt_asm
crypto_aead_encrypt_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN
st X+, r20
st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR
st X+, r18
st X+, r17 ;store associated data length in SRAM_ADLEN
st X+, r16
st X+, r15 ;store nonce address in SRAM_NONCE_ADDR
st X+, r14
st X+, r13 ;store key address in SRAM_KEY_ADDR
st X+, r12
mov mclen, r20
mov radlen, r16
ldi rate, RATE_INBYTES
clr ed
rcall AUTH_AND_ENCDEC
rcall TAG
POP_ALL
ret
; int crypto_aead_decrypt_asm(
; unsigned char *m,
; const unsigned char *c,
; unsigned long long clen,
; const unsigned char *ad,
; unsigned long long radlen,
; const unsigned char *npub,
; const unsigned char *k
; )
;
; unsigned char *m, is passed in r24:r25
; const unsigned char *c, is passed in r22:r23
; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used
; const unsigned char *ad, is passed in r18:r19
; unsigned long long radlen, is passed in r16:r17, only LSB (r16) is used
; const unsigned char *npub, is passed in r14:r15
; const unsigned char *k is passed in r12:r13
.global crypto_aead_decrypt_asm
crypto_aead_decrypt_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN
st X+, r20
st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR
st X+, r18
st X+, r17 ;store associated data length in SRAM_ADLEN
st X+, r16
st X+, r15 ;store nonce address in SRAM_NONCE_ADDR
st X+, r14
st X+, r13 ;store key address in SRAM_KEY_ADDR
st X+, r12
mov mclen, r20
mov radlen, r16
ldi rate, RATE_INBYTES
clr ed
inc ed
rcall AUTH_AND_ENCDEC
ldi YH, hi8(SRAM_ADDITIONAL)
ldi YL, lo8(SRAM_ADDITIONAL)
movw addr2, YL
rcall TAG
sbiw YL, CRYPTO_ABYTES
movw XL, addr0
ldi cnt0, CRYPTO_ABYTES
compare_tag:
ld t0, Y+
ld x0, X+
cp t0, x0
brne return_tag_not_match
dec cnt0
brne compare_tag
rjmp return_tag_match
return_tag_not_match:
ldi r25, 0xFF
ldi r24, 0xFF
rjmp crypto_aead_decrypt_end
return_tag_match:
clr r25
clr r24
crypto_aead_decrypt_end:
POP_ALL
ret
; #ifdef CRYPTO_AEAD
#endif
#ifdef CRYPTO_HASH
; void crypto_hash_asm(
; unsigned char *out,
; const unsigned char *in,
; unsigned long long inlen
; )
;
; unsigned char *out, is passed in r24:r25
; const unsigned char *in, is passed in r22:r23
; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used
.global crypto_hash_asm
crypto_hash_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN
st X+, r20
mov mclen, r20
; empty half state
ldi YH, hi8(SRAM_STATE + INITIAL_RATE_INBYTES)
ldi YL, lo8(SRAM_STATE + INITIAL_RATE_INBYTES)
clr rmp
ldi cnt1, (STATE_INBYTES - INITIAL_RATE_INBYTES)
zero_state:
st Y+, rmp
dec cnt1
brne zero_state
ldi domain_cnt0, 1
sbiw YL, (STATE_INBYTES - INITIAL_RATE_INBYTES)
lds XH, SRAM_MESSAGE_IN_ADDR
lds XL, SRAM_MESSAGE_IN_ADDR + 1
tst mclen
breq add_domain
test_mlen_initrate:
; mclen != 0
cpi mclen, INITIAL_RATE_INBYTES
brlo less_than_initial_rate
breq equal_to_initial_rate
more_than_initial_rate:
rcall Load_Reorder_Store_128_bits
ldi rate, HASH_RATE_INBYTES
mov radlen, mclen
subi radlen, INITIAL_RATE_INBYTES
mov rmp, radlen
andi rmp, HASH_RATE_INBYTES_MASK
breq hash_message
inc domain_cnt0
hash_message:
rcall HASH
rjmp gen_digest
equal_to_initial_rate:
inc domain_cnt0
rcall Load_Reorder_Store_128_bits
rjmp add_domain
less_than_initial_rate:
mov cnt0, mclen
ldi rate, INITIAL_RATE_INBYTES
rcall PAD_OneZero
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
rcall Load_Reorder_Store_128_bits
rjmp add_domain
add_domain:
mov domain_cnt, domain_cnt0
rcall AddDomainCounter
gen_digest:
lds XH, SRAM_MESSAGE_OUT_ADDR
lds XL, SRAM_MESSAGE_OUT_ADDR + 1
movw addr2, XL
rcall TAG
movw XL, addr2
adiw XL, SQUEEZE_RATE_INBYTES
movw addr2, XL
rcall TAG
POP_ALL
ret
#endif
\ No newline at end of file
;
; **********************************************
; * PHOTON-Beetle *
; * Authenticated Encryption and Hash Family *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.0 2020 by PHOTON-Beetle Team *
; **********************************************
;
#define ROUND_N 12
#define DIM 8
.MACRO Store_OneRow
st X+, x0
st X+, x1
st X+, x2
st X+, x3
.ENDM
.MACRO ROTL_1 i0
bst \i0, 7
lsl \i0
bld \i0, 0
.ENDM
.MACRO ROTR_1 i0
bst \i0, 0
lsr \i0
bld \i0, 7
.ENDM
.MACRO ROTR_4 i0
swap \i0
.ENDM
ROTR_1_ROW:
ROTR_1 x0
ROTR_1 x1
ROTR_1 x2
ROTR_1 x3
ret
ROTL_1_ROW:
ROTL_1 x0
ROTL_1 x1
ROTL_1 x2
ROTL_1 x3
ret
ROTR_4_ROW:
ROTR_4 x0
ROTR_4 x1
ROTR_4 x2
ROTR_4 x3
ret
RoundFunction:
rjmp AddRC_Sbox_ShiftRow_Start
ShiftRow_routine_table:
rjmp ShiftRow_RecoverZ_NoLPM
rjmp ShiftRow_1
rjmp ShiftRow_2
rjmp ShiftRow_3
rjmp ShiftRow_4
rjmp ShiftRow_5
rjmp ShiftRow_6
rjmp ShiftRow_7
ShiftRow_1:
rcall ROTR_1_ROW
rjmp ShiftRow_RecoverZ_LPM
ShiftRow_2:
rcall ROTR_1_ROW
rcall ROTR_1_ROW
rjmp ShiftRow_RecoverZ_NoLPM
ShiftRow_3:
rcall ROTR_4_ROW
rcall ROTL_1_ROW
rjmp ShiftRow_RecoverZ_LPM
ShiftRow_4:
rcall ROTR_4_ROW
rjmp ShiftRow_RecoverZ_NoLPM
ShiftRow_5:
rcall ROTR_4_ROW
rcall ROTR_1_ROW
rjmp ShiftRow_RecoverZ_LPM
ShiftRow_6:
rcall ROTL_1_ROW
rcall ROTL_1_ROW
rjmp ShiftRow_RecoverZ_NoLPM
ShiftRow_7:
rcall ROTL_1_ROW
rjmp ShiftRow_RecoverZ_NoLPM
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Start AddRC_Sbox_ShiftRow
AddRC_Sbox_ShiftRow_Start:
clr t3
inc t3
ldi XL, lo8(SRAM_STATE)
ldi XH, hi8(SRAM_STATE)
ldi YL, pm_lo8(ShiftRow_routine_table)
ldi YH, pm_hi8(ShiftRow_routine_table)
ldi rmp, DIM
lpm t0, Z+ ; Load two nibbles of round constant for row 0, 1
AddRC_Sbox_ShiftRow_Loop:
; AddRC_TwoRows
ld x0, X+
ld x1, X+
ld x2, X+
ld x3, X+
sbiw XL, 4
ror t0
brcc next1
eor x0, t3
next1:
ror t0
brcc next2
eor x1, t3
next2:
ror t0
brcc next3
eor x2, t3
next3:
ror t0
brcc next4
eor x3, t3
next4:
; Sbox_TwoRows
eor x1, x2
mov t1, x2
and t1, x1
eor x3, t1
mov t1, x3
and x3, x1
eor x3, x2
mov t2, x3
eor x3, x0
com x3
mov x2, x3
or t2, x0
eor x0, t1
eor x1, x0
or x2, x1
eor x2, t1
eor x1, t2
eor x3, x1
movw cnt0, ZL
movw ZL, YL
ijmp
ShiftRow_RecoverZ_NoLPM:
movw ZL, cnt0
rjmp ShiftRow_STORE_ROW
ShiftRow_RecoverZ_LPM:
movw ZL, cnt0
lpm t0, Z+ ; Load two nibbles of round constant for row 2i, 2i+1
ShiftRow_STORE_ROW:
Store_OneRow
adiw YL, 1
dec rmp
brne AddRC_Sbox_ShiftRow_Loop
;;;;;;;;;;;;;;;;;;;;;;;; MixColumn Subroutnes
rjmp MC_Start
mul_routine_table:
rjmp mul2_GF16_0x13_xor
rjmp mul4_GF16_0x13_xor
rjmp mul2_GF16_0x13_xor
rjmp mulb_GF16_0x13_xor
rjmp mul2_GF16_0x13_xor
rjmp mul8_GF16_0x13_xor
rjmp mul5_GF16_0x13_xor
rjmp mul6_GF16_0x13_xor
; For all mul2_GF16_0x13_xor:
; Input
; MSB........LSB
; x0=@0: x1=@1: x2=@2: x3=@3
mul2_GF16_0x13_xor:
; # define mul2_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
; x3 = XOR (x3 ,x0); \
; } while (0) ; /* Output : ( MSB ) x1 ,x2 ,x3 , x0 ( LSB ) */
eor t3, t0
eor x0, t0
eor x1, t3
eor x2, t2
eor x3, t1
rjmp MC_INC_CNT1
mul4_GF16_0x13_xor:
; # define mul4_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
; x3 = XOR (x3 ,x0); x0 = XOR (x0 ,x1); \
; } while (0) ; /* Output : ( MSB ) x2 ,x3 ,x0 , x1 ( LSB ) */
eor t3, t0
eor t0, t1
eor x0, t1
eor x1, t0
eor x2, t3
eor x3, t2
rjmp MC_INC_CNT1
mul5_GF16_0x13_xor:
; # define mul5_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
; x2 = XOR (x2 ,x0); x3 = XOR (x3 ,x1); \
; x1 = XOR (x1 ,x2); x0 = XOR (x0 ,x3); \
; } while (0) ; /* Output : ( MSB ) x2 ,x0 ,x1 , x3 ( LSB ) */
eor t2, t0
eor t3, t1
eor t1, t2
eor t0, t3
eor x0, t3
eor x1, t1
eor x2, t0
eor x3, t2
rjmp MC_INC_CNT1
mul6_GF16_0x13_xor:
; # define mul6_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
; x3 = XOR (x3 ,x1); x1 = XOR (x1 ,x0); \
; x2 = XOR (x2 ,x1); x0 = XOR (x0 ,x2); \
; x2 = XOR (x2 ,x3); \
; } while (0) ; /* Output : ( MSB ) x0 ,x2 ,x3 , x1 ( LSB ) */
eor t3, t1
eor t1, t0
eor t2, t1
eor t0, t2
eor t2, t3
eor x0, t1
eor x1, t3
eor x2, t2
eor x3, t0
rjmp MC_STORE_ROW
mul8_GF16_0x13_xor:
; # define mul8_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
; x3 = XOR (x3 ,x0); x0 = XOR (x0 ,x1); \
; x1 = XOR (x1 ,x2); \
; } while (0) ; /* Output : ( MSB ) x3 ,x0 ,x1 , x2 ( LSB ) */
eor t3, t0
eor t0, t1
eor t1, t2
eor x0, t2
eor x1, t1
eor x2, t0
eor x3, t3
rjmp MC_INC_CNT1
mulb_GF16_0x13_xor:
; # define mul11_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
; x2 = XOR (x2 ,x0); x1 = XOR (x1 ,x3); \
; x0 = XOR (x0 ,x1); x3 = XOR (x3 ,x2); \
; } while (0) ; /* Output : ( MSB ) x1 ,x2 ,x0 , x3 ( LSB ) */
eor t2, t0
eor t1, t3
eor t0, t1
eor t3, t2
eor x0, t3
eor x1, t0
eor x2, t2
eor x3, t1
rjmp MC_INC_CNT1
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Start MixColumns
MC_Start:
movw addr4, ZL
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
movw YL, XL
clr cnt0
clr cnt1
A1:
mov cnt1, cnt0
clr x0
clr x1
clr x2
clr x3
ldi ZH, pm_hi8(mul_routine_table)
ldi ZL, pm_lo8(mul_routine_table)
MC_MUL_LOOP:
ld t3, X+
ld t2, X+
ld t1, X+
ld t0, X+
ijmp
MC_INC_CNT1:
inc cnt1
cpi cnt1, DIM
brne MC_MUL_NEXT
clr cnt1
movw XL, YL
MC_MUL_NEXT:
adiw ZL, 1
rjmp MC_MUL_LOOP
MC_STORE_ROW:
cpi cnt0, 0
brne MC_STORE_DIRECT
sbiw XL, STATE_INBYTES
MC_STORE_DIRECT:
Store_OneRow
inc cnt0
cpi cnt0, DIM
brne A1
movw ZL, addr4
ret
PHOTON_Permutation:
ldi ZH, hi8(RC)
ldi ZL, lo8(RC)
ldi cnt2, ROUND_N
round_loop_start:
rcall RoundFunction
dec cnt2
brne round_loop_start
ret
.section .text
RC:
.byte 0x01,0x62,0xFE,0x9D
.byte 0x23,0x40,0xDC,0xBF
.byte 0x67,0x04,0x98,0xFB
.byte 0xFE,0x9D,0x01,0x62
.byte 0xCD,0xAE,0x32,0x51
.byte 0xAB,0xC8,0x54,0x37
.byte 0x76,0x15,0x89,0xEA
.byte 0xDC,0xBF,0x23,0x40
.byte 0x89,0xEA,0x76,0x15
.byte 0x32,0x51,0xCD,0xAE
.byte 0x45,0x26,0xBA,0xD9
.byte 0xBA,0xD9,0x45,0x26
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
;
; **********************************************
; * PHOTON-Beetle *
; * Authenticated Encryption and Hash Family *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.0 2020 by PHOTON-Beetle Team *
; **********************************************
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Bitslice
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
.MACRO Reorder_8_bits i0, i1, i2, i3, i4
ror \i0
ror \i1
ror \i0
ror \i2
ror \i0
ror \i3
ror \i0
ror \i4
ror \i0
ror \i1
ror \i0
ror \i2
ror \i0
ror \i3
ror \i0
ror \i4
.ENDM
.MACRO InvReorder_8_bits i0, i1, i2, i3, i4
ror \i1
ror \i0
ror \i2
ror \i0
ror \i3
ror \i0
ror \i4
ror \i0
ror \i1
ror \i0
ror \i2
ror \i0
ror \i3
ror \i0
ror \i4
ror \i0
.ENDM
; require XH:XL be the address of the input
Load_Reorder_32_bits:
ldi cnt1, 4
reorder_8_bits_loop:
ld rmp, X+
Reorder_8_bits rmp, x0, x1, x2, x3
dec cnt1
brne reorder_8_bits_loop
ret
; require YH:YL be the address of the output
invReorder_Store_32_bits:
ldi cnt1, 4
invreorder_8_bits_loop:
InvReorder_8_bits rmp, x0, x1, x2, x3
st Y+, rmp
dec cnt1
brne invreorder_8_bits_loop
ret
; require XH:XL be the address of the input
; require YH:YL be the address of the output
Load_Reorder_Store_128_bits:
ldi cnt0, 4
reorder_32_bits_loop:
rcall Load_Reorder_32_bits
st Y+, x0
st Y+, x1
st Y+, x2
st Y+, x3
dec cnt0
brne reorder_32_bits_loop
ret
; require XH:XL be the address of the input
; require YH:YL be the address of the output
Load_invReorder_Store_128_bits:
ldi cnt0, 4
invreorder_32_bits_loop:
ld x0, X+
ld x1, X+
ld x2, X+
ld x3, X+
rcall invReorder_Store_32_bits
dec cnt0
brne invreorder_32_bits_loop
ret
.macro PUSH_ALL
push r2
push r3
push r4
push r5
push r6
push r7
push r8
push r9
push r10
push r11
push r12
push r13
push r14
push r15
push r16
push r17
push r28
push r29
.endm
.macro POP_ALL
pop r29
pop r28
pop r17
pop r16
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
pop r9
pop r8
pop r7
pop r6
pop r5
pop r4
pop r3
pop r2
clr r1
.endm
#ifdef __cplusplus
extern "C" {
#endif
int crypto_aead_encrypt(
unsigned char *c,unsigned long long *clen,
const unsigned char *m,unsigned long long mlen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m,unsigned long long *outputmlen,
unsigned char *nsec,
const unsigned char *c,unsigned long long clen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
#ifdef __cplusplus
}
#endif
#include <avr/io.h>
#include <avr/sfr_defs.h>
#include <stdlib.h>
#include <string.h>
#include "api.h"
extern void crypto_aead_encrypt_asm(
unsigned char *c,
const unsigned char *m,
unsigned char mlen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern char crypto_aead_decrypt_asm(
unsigned char *m,
const unsigned char *c,
unsigned char clen,
const unsigned char *ad,
unsigned char adlen,
const unsigned char *npub,
const unsigned char *k
);
extern void crypto_hash_asm(
unsigned char *out,
const unsigned char *in,
unsigned char inlen
);
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the cipher implementation goes here,
... generating a ciphertext c[0],c[1],...,c[*clen-1]
... from a plaintext m[0],m[1],...,m[mlen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce npub[0],npub[1],..
... and secret key k[0],k[1],...
... the implementation shall not use nsec
...
... return 0;
*/
(void)nsec;
crypto_aead_encrypt_asm(c, m, mlen, ad, adlen, npub, k);
*clen = mlen + CRYPTO_ABYTES;
return 0;
}
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
)
{
/*
...
... the code for the AEAD implementation goes here,
... generating a plaintext m[0],m[1],...,m[*mlen-1]
... and secret message number nsec[0],nsec[1],...
... from a ciphertext c[0],c[1],...,c[clen-1]
... and associated data ad[0],ad[1],...,ad[adlen-1]
... and nonce number npub[0],npub[1],...
... and secret key k[0],k[1],...
...
... return 0;
*/
unsigned long long mlen_;
char tag_is_match;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
return -1;
}
mlen_ = clen - CRYPTO_ABYTES;
tag_is_match = crypto_aead_decrypt_asm(m, c, mlen_, ad, adlen, npub, k);
if (tag_is_match != 0)
{
memset(m, 0, (size_t)mlen_);
return -1;
}
*mlen = mlen_;
return 0;
}
\ No newline at end of file
;
; **********************************************
; * PHOTON-Beetle *
; * Authenticated Encryption and Hash Family *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.0 2020 by PHOTON-Beetle Team *
; **********************************************
;
#include "api.h"
#define CRYPTO_AEAD
#define STATE_INBITS 256
#define STATE_INBYTES ((STATE_INBITS + 7) / 8)
#define RATE_INBITS 32
#define RATE_INBYTES ((RATE_INBITS + 7) / 8)
#define RATE_INBYTES_MASK (RATE_INBYTES - 1)
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
; For CRYPTO_HASH
#define CRYPTO_BYTES 32
#define INITIAL_RATE_INBITS 128
#define INITIAL_RATE_INBYTES ((INITIAL_RATE_INBITS + 7) / 8)
#define HASH_RATE_INBITS 32
#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8)
#define HASH_RATE_INBYTES_MASK (HASH_RATE_INBYTES - 1)
;
; ============================================
; R E G I S T E R D E F I N I T I O N S
; ============================================
;
; [Add all register names here, include info on
; all used registers without specific names]
; rmp: Multipurpose register
#define rmp r16
#define rate r17
#define mclen r18
#define radlen r19
#define adlen_org r0
#define cnt0 r20
#define cnt1 r21
#define cnt2 r22
#define SQUEEZE_RATE_INBITS 128
#define SQUEEZE_RATE_INBYTES ((SQUEEZE_RATE_INBITS + 7) / 8)
#define SQUEEZE_RATE_MASK (SQUEEZE_RATE_INBYTES - 1)
#define CAPACITY_INBITS (STATE_INBITS - RATE_INBITS)
#define CAPACITY_INBYTES ((CAPACITY_INBITS + 7) / 8)
; For CRYPTO_AEAD
#define KEY_INBITS (CRYPTO_KEYBYTES * 8)
#define KEY_INBYTES (CRYPTO_KEYBYTES)
#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8)
#define NONCE_INBYTES (CRYPTO_NPUBBYTES)
#define TAG_INBITS (CRYPTO_ABYTES * 8)
#define TAG_INBYTES (CRYPTO_ABYTES)
#define t0 r8
#define t1 r9
#define t2 r10
#define t3 r11
#define x0 r12
#define x1 r13
#define x2 r14
#define x3 r15
#define ed r1
#define addr0 r2
#define addr1 r3
#define addr2 r4
#define addr3 r5
#define addr4 r6
#define addr5 r7
; domain_cnt overlap with cnt0, only temporarily used, no need to back up
#define domain_cnt r20
#define domain_cnt0 r23
#define domain_cnt1 r24
.section .noinit
SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0
SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0
SRAM_MESSAGE_IN_LEN: .BYTE 0, 0
#ifdef CRYPTO_AEAD
; For CRYPTO_AEAD
SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0
SRAM_ADLEN: .BYTE 0, 0
SRAM_NONCE_ADDR: .BYTE 0, 0
SRAM_KEY_ADDR: .BYTE 0, 0
SRAM_ADDITIONAL: .BYTE 0, 0, 0, 0
.BYTE 0, 0, 0, 0
.BYTE 0, 0, 0, 0
.BYTE 0, 0, 0, 0
#endif
; SRAM required additionally, besides those used for API
SRAM_PAD: .BYTE 0, 0, 0, 0
#if ((defined(CRYPTO_AEAD) && (RATE_INBYTES > 4)) || defined(CRYPTO_HASH))
.BYTE 0, 0, 0, 0
.BYTE 0, 0, 0, 0
.BYTE 0, 0, 0, 0
#endif
.section .text
#include "assist.h"
#include "photon.h"
AddDomainCounter:
ldi YH, hi8(SRAM_STATE + STATE_INBYTES - 3)
ldi YL, lo8(SRAM_STATE + STATE_INBYTES - 3)
ldi rmp, 0x80
ldi cnt1, 3
check_domain_bit:
ror domain_cnt
brcc no_xor
ld x0, Y
eor x0, rmp
st Y, x0
no_xor:
adiw YL, 1
dec cnt1
brne check_domain_bit
ret
; require XH:XL be the address of the current associated data/message block
XOR_to_State:
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
mov cnt0, rate
dec cnt0
XOR_to_State_loop:
rcall Load_Reorder_32_bits
ld rmp, Y
eor rmp, x0
st Y+, rmp
ld rmp, Y
eor rmp, x1
st Y+, rmp
ld rmp, Y
eor rmp, x2
st Y+, rmp
ld rmp, Y
eor rmp, x3
st Y+, rmp
subi cnt0, 4
brsh XOR_to_State_loop
; XH:XL are now the address of the next associated data/message block if this is not the last block
ret
; require XH:XL pointed to the source data to be padded
PAD_OneZero:
ldi YH, hi8(SRAM_PAD)
ldi YL, lo8(SRAM_PAD)
mov cnt1, rate
pad_copy:
ld rmp, X+
st Y+, rmp
dec cnt1
dec cnt0
brne pad_copy
pad_one:
ldi rmp, 1
st Y+, rmp
dec cnt1
breq pad_end
clr rmp
pad_zero:
st Y+, rmp
dec cnt1
brne pad_zero
pad_end:
ldi XH, hi8(SRAM_PAD)
ldi XL, lo8(SRAM_PAD)
; XH:XL are now pointed to last block needed to be processed
ret
HASH:
movw addr0, XL
hash_block_loop:
rcall PHOTON_Permutation
movw XL, addr0
cp rate, radlen
brsh hash_last_block
rcall XOR_to_State
movw addr0, XL
sub radlen, rate
rjmp hash_block_loop
hash_last_block:
cp radlen, rate
breq hash_xor_domain
mov cnt0, radlen
rcall PAD_OneZero
hash_xor_domain:
clr radlen
rcall XOR_to_State
mov domain_cnt, domain_cnt0
rcall AddDomainCounter
ret
TAG:
rcall PHOTON_Permutation
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
movw YL, addr2
rcall Load_invReorder_Store_128_bits
ret
#ifdef CRYPTO_AEAD
.IF (RATE_INBITS == 128)
XOR_to_Cipher:
mov t2, rate
cp t2, mclen
brlo XOR_to_Cipher_Start
mov t2, mclen ; backup the real length of the remaining message
XOR_to_Cipher_Start:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
ldi YH, hi8(SRAM_ADDITIONAL)
ldi YL, lo8(SRAM_ADDITIONAL)
rcall Load_invReorder_Store_128_bits ; State move to additional SRAM and reorder
movw XL, addr0
movw ZL, addr2
; XOR Part 2
sbiw YL, (RATE_INBYTES>>1) ; Pointed to Part 2
ldi cnt0, (RATE_INBYTES>>1)
cp cnt0, mclen
brlo XOR_Part2_Store_Cipher_begin
mov cnt0, mclen
XOR_Part2_Store_Cipher_begin:
sub mclen, cnt0
XOR_Part2_Store_Cipher_loop:
ld t0, Y+
ld x0, X+
eor x0, t0
st Z+, x0
dec cnt0
brne XOR_Part2_Store_Cipher_loop
cpi mclen, 1
brlo XOR_to_Cipher_END
; XOR (Part 1 >>> 1)
ldi cnt0, (RATE_INBYTES>>1)
cp cnt0, mclen
brlo XOR_Part1_Store_Cipher_begin
mov cnt0, mclen
XOR_Part1_Store_Cipher_begin:
sub mclen, cnt0
ldi YH, hi8(SRAM_ADDITIONAL)
ldi YL, lo8(SRAM_ADDITIONAL)
ld t0, Y
bst t0, 0
adiw YL, (RATE_INBYTES>>1)-1
ld t0, Y
ror t0
bld t0, 7
st Y, t0
ldi cnt1, (RATE_INBYTES>>1)-1
ROR_part1_loop:
ld t0, -Y
ror t0
st Y, t0
dec cnt1
brne ROR_part1_loop
XOR_Part1_Store_Cipher_loop:
ld t0, Y+
ld x0, X+
eor x0, t0
st Z+, x0
dec cnt0
brne XOR_Part1_Store_Cipher_loop
XOR_to_Cipher_END:
tst ed
brne XOR_to_Cipher_dec
XOR_to_Cipher_enc:
movw XL, addr0
cp t2, rate
brsh XOR_to_Cipher_XOR_to_State
mov cnt0, t2
rcall PAD_OneZero
rjmp XOR_to_Cipher_XOR_to_State
XOR_to_Cipher_dec:
movw XL, addr2
cp t2, rate
brsh XOR_to_Cipher_XOR_to_State
; need to be padded
mov cnt0, t2
rcall PAD_OneZero
XOR_to_Cipher_XOR_to_State:
rcall XOR_to_State
clr rmp
add addr0, t2
adc addr1, rmp
add addr2, t2
adc addr3, rmp
ret
.ELSE
; RATE_INBITS == 32
XOR_to_Cipher:
mov t2, rate
cp t2, mclen
brlo XOR_to_Cipher_Start
mov t2, mclen ; backup the real length of the remaining message
XOR_to_Cipher_Start:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
ld x0, X+
ld x1, X+
ld x2, X+
ld x3, X+
ldi YH, hi8(SRAM_ADDITIONAL)
ldi YL, lo8(SRAM_ADDITIONAL)
rcall invReorder_Store_32_bits
movw XL, addr0
movw ZL, addr2
; XOR Part 2
sbiw YL, (RATE_INBYTES>>1) ; Pointed to Part 2
ldi cnt0, (RATE_INBYTES>>1)
cp cnt0, mclen
brlo XOR_Part2_Store_Cipher_begin
mov cnt0, mclen
XOR_Part2_Store_Cipher_begin:
sub mclen, cnt0
XOR_Part2_Store_Cipher_loop:
ld t0, Y+
ld x0, X+
eor x0, t0
st Z+, x0
dec cnt0
brne XOR_Part2_Store_Cipher_loop
cpi mclen, 1
brlo XOR_to_Cipher_END
; XOR (Part 1 >>> 1)
ldi cnt0, (RATE_INBYTES>>1)
cp cnt0, mclen
brlo XOR_Part1_Store_Cipher_begin
mov cnt0, mclen
XOR_Part1_Store_Cipher_begin:
sub mclen, cnt0
ldi YH, hi8(SRAM_ADDITIONAL)
ldi YL, lo8(SRAM_ADDITIONAL)
ld t0, Y+
ld t1, Y+
bst t0, 0
ror t1
ror t0
bld t1, 7
ld x0, X+
eor x0, t0
st Z+, x0
dec cnt0
breq XOR_to_Cipher_END
ld x0, X+
eor x0, t1
st Z+, x0
XOR_to_Cipher_END:
tst ed
brne XOR_to_Cipher_dec
XOR_to_Cipher_enc:
movw XL, addr0
cp t2, rate
brsh XOR_to_Cipher_XOR_to_State
mov cnt0, t2
rcall PAD_OneZero
rjmp XOR_to_Cipher_XOR_to_State
XOR_to_Cipher_dec:
movw XL, addr2
cp t2, rate
brsh XOR_to_Cipher_XOR_to_State
; need to be padded
mov cnt0, t2
rcall PAD_OneZero
XOR_to_Cipher_XOR_to_State:
rcall XOR_to_State
clr rmp
add addr0, t2
adc addr1, rmp
add addr2, t2
adc addr3, rmp
ret
.ENDIF
ENC:
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
lds XH, SRAM_MESSAGE_IN_ADDR
lds XL, SRAM_MESSAGE_IN_ADDR + 1
movw addr0, XL
movw addr2, ZL
enc_block_loop:
rcall PHOTON_Permutation
rcall XOR_to_Cipher
cpi mclen, 1
brsh enc_block_loop
mov domain_cnt, domain_cnt1
rcall AddDomainCounter
ret
AUTH_AND_ENCDEC:
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
lds XH, SRAM_NONCE_ADDR
lds XL, SRAM_NONCE_ADDR + 1
rcall Load_Reorder_Store_128_bits
lds XH, SRAM_KEY_ADDR
lds XL, SRAM_KEY_ADDR + 1
rcall Load_Reorder_Store_128_bits
ldi domain_cnt0, 1
ldi domain_cnt1, 1
test_adlen_zero:
tst radlen
breq adlen_zero_test_mlen_zero
; radlen != 0
adlen_nzero_test_mlen_zero:
tst mclen
brne test_adlen_divisible
ldi domain_cnt0, 3
test_adlen_divisible:
mov rmp, radlen
andi rmp, RATE_INBYTES_MASK
breq hash_ad
inc domain_cnt0 ; 2 or 4
hash_ad:
lds XH, SRAM_ASSOCIATED_DATA_ADDR
lds XL, SRAM_ASSOCIATED_DATA_ADDR + 1
rcall HASH
tst mclen
breq mlen_zero_inputout_address
rjmp test_mlen_divisible
adlen_zero_test_mlen_zero:
ldi domain_cnt1, 5
tst mclen
breq adlen_zero_mlen_zero
; mclen != 0
test_mlen_divisible:
mov rmp, mclen
andi rmp, RATE_INBYTES_MASK
breq enc_dec_m
inc domain_cnt1 ; 2 or 6
enc_dec_m:
rcall ENC
rjmp AUTH_AND_ENCDEC_end
adlen_zero_mlen_zero:
; empty message and empty associated data
ldi YH, hi8(SRAM_STATE + STATE_INBYTES - 3)
ldi YL, lo8(SRAM_STATE + STATE_INBYTES - 3)
ld x0, Y
ldi rmp, 0x80
eor x0, rmp
st Y, x0
mlen_zero_inputout_address:
tst ed
brne dec_inputout_address
enc_inputout_address:
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
movw addr2, ZL
rjmp AUTH_AND_ENCDEC_end
dec_inputout_address:
lds ZH, SRAM_MESSAGE_IN_ADDR
lds ZL, SRAM_MESSAGE_IN_ADDR + 1
movw addr0, ZL
AUTH_AND_ENCDEC_end:
ret
; void crypto_aead_encrypt_asm(
; unsigned char *c,
; const unsigned char *m,
; unsigned long long mlen,
; const unsigned char *ad,
; unsigned long long radlen,
; const unsigned char *npub,
; const unsigned char *k
; )
;
; unsigned char *c, is passed in r24:r25
; const unsigned char *m, is passed in r22:r23
; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used
; const unsigned char *ad, is passed in r18:r19
; unsigned long long radlen, is passed in r16:r17, only LSB (r16) is used
; const unsigned char *npub, is passed in r14:r15
; const unsigned char *k is passed in r12:r13
.global crypto_aead_encrypt_asm
crypto_aead_encrypt_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN
st X+, r20
st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR
st X+, r18
st X+, r17 ;store associated data length in SRAM_ADLEN
st X+, r16
st X+, r15 ;store nonce address in SRAM_NONCE_ADDR
st X+, r14
st X+, r13 ;store key address in SRAM_KEY_ADDR
st X+, r12
mov mclen, r20
mov radlen, r16
ldi rate, RATE_INBYTES
clr ed
rcall AUTH_AND_ENCDEC
rcall TAG
POP_ALL
ret
; int crypto_aead_decrypt_asm(
; unsigned char *m,
; const unsigned char *c,
; unsigned long long clen,
; const unsigned char *ad,
; unsigned long long radlen,
; const unsigned char *npub,
; const unsigned char *k
; )
;
; unsigned char *m, is passed in r24:r25
; const unsigned char *c, is passed in r22:r23
; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used
; const unsigned char *ad, is passed in r18:r19
; unsigned long long radlen, is passed in r16:r17, only LSB (r16) is used
; const unsigned char *npub, is passed in r14:r15
; const unsigned char *k is passed in r12:r13
.global crypto_aead_decrypt_asm
crypto_aead_decrypt_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN
st X+, r20
st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR
st X+, r18
st X+, r17 ;store associated data length in SRAM_ADLEN
st X+, r16
st X+, r15 ;store nonce address in SRAM_NONCE_ADDR
st X+, r14
st X+, r13 ;store key address in SRAM_KEY_ADDR
st X+, r12
mov mclen, r20
mov radlen, r16
ldi rate, RATE_INBYTES
clr ed
inc ed
rcall AUTH_AND_ENCDEC
ldi YH, hi8(SRAM_ADDITIONAL)
ldi YL, lo8(SRAM_ADDITIONAL)
movw addr2, YL
rcall TAG
sbiw YL, CRYPTO_ABYTES
movw XL, addr0
ldi cnt0, CRYPTO_ABYTES
compare_tag:
ld t0, Y+
ld x0, X+
cp t0, x0
brne return_tag_not_match
dec cnt0
brne compare_tag
rjmp return_tag_match
return_tag_not_match:
ldi r25, 0xFF
ldi r24, 0xFF
rjmp crypto_aead_decrypt_end
return_tag_match:
clr r25
clr r24
crypto_aead_decrypt_end:
POP_ALL
ret
; #ifdef CRYPTO_AEAD
#endif
#ifdef CRYPTO_HASH
; void crypto_hash_asm(
; unsigned char *out,
; const unsigned char *in,
; unsigned long long inlen
; )
;
; unsigned char *out, is passed in r24:r25
; const unsigned char *in, is passed in r22:r23
; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used
.global crypto_hash_asm
crypto_hash_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN
st X+, r20
mov mclen, r20
; empty half state
ldi YH, hi8(SRAM_STATE + INITIAL_RATE_INBYTES)
ldi YL, lo8(SRAM_STATE + INITIAL_RATE_INBYTES)
clr rmp
ldi cnt1, (STATE_INBYTES - INITIAL_RATE_INBYTES)
zero_state:
st Y+, rmp
dec cnt1
brne zero_state
ldi domain_cnt0, 1
sbiw YL, (STATE_INBYTES - INITIAL_RATE_INBYTES)
lds XH, SRAM_MESSAGE_IN_ADDR
lds XL, SRAM_MESSAGE_IN_ADDR + 1
tst mclen
breq add_domain
test_mlen_initrate:
; mclen != 0
cpi mclen, INITIAL_RATE_INBYTES
brlo less_than_initial_rate
breq equal_to_initial_rate
more_than_initial_rate:
rcall Load_Reorder_Store_128_bits
ldi rate, HASH_RATE_INBYTES
mov radlen, mclen
subi radlen, INITIAL_RATE_INBYTES
mov rmp, radlen
andi rmp, HASH_RATE_INBYTES_MASK
breq hash_message
inc domain_cnt0
hash_message:
rcall HASH
rjmp gen_digest
equal_to_initial_rate:
inc domain_cnt0
rcall Load_Reorder_Store_128_bits
rjmp add_domain
less_than_initial_rate:
mov cnt0, mclen
ldi rate, INITIAL_RATE_INBYTES
rcall PAD_OneZero
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
rcall Load_Reorder_Store_128_bits
rjmp add_domain
add_domain:
mov domain_cnt, domain_cnt0
rcall AddDomainCounter
gen_digest:
lds XH, SRAM_MESSAGE_OUT_ADDR
lds XL, SRAM_MESSAGE_OUT_ADDR + 1
movw addr2, XL
rcall TAG
movw XL, addr2
adiw XL, SQUEEZE_RATE_INBYTES
movw addr2, XL
rcall TAG
POP_ALL
ret
#endif
\ No newline at end of file
;
; **********************************************
; * PHOTON-Beetle *
; * Authenticated Encryption and Hash Family *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.0 2020 by PHOTON-Beetle Team *
; **********************************************
;
#define ROUND_N 12
#define DIM 8
.MACRO Store_OneRow
st X+, x0
st X+, x1
st X+, x2
st X+, x3
.ENDM
.MACRO ROTL_1 i0
bst \i0, 7
lsl \i0
bld \i0, 0
.ENDM
.MACRO ROTR_1 i0
bst \i0, 0
lsr \i0
bld \i0, 7
.ENDM
.MACRO ROTR_4 i0
swap \i0
.ENDM
ROTR_1_ROW:
ROTR_1 x0
ROTR_1 x1
ROTR_1 x2
ROTR_1 x3
ret
ROTL_1_ROW:
ROTL_1 x0
ROTL_1 x1
ROTL_1 x2
ROTL_1 x3
ret
ROTR_4_ROW:
ROTR_4 x0
ROTR_4 x1
ROTR_4 x2
ROTR_4 x3
ret
RoundFunction:
rjmp AddRC_Sbox_ShiftRow_Start
ShiftRow_routine_table:
rjmp ShiftRow_RecoverZ_NoLPM
rjmp ShiftRow_1
rjmp ShiftRow_2
rjmp ShiftRow_3
rjmp ShiftRow_4
rjmp ShiftRow_5
rjmp ShiftRow_6
rjmp ShiftRow_7
ShiftRow_1:
rcall ROTR_1_ROW
rjmp ShiftRow_RecoverZ_LPM
ShiftRow_2:
rcall ROTR_1_ROW
rcall ROTR_1_ROW
rjmp ShiftRow_RecoverZ_NoLPM
ShiftRow_3:
rcall ROTR_4_ROW
rcall ROTL_1_ROW
rjmp ShiftRow_RecoverZ_LPM
ShiftRow_4:
rcall ROTR_4_ROW
rjmp ShiftRow_RecoverZ_NoLPM
ShiftRow_5:
rcall ROTR_4_ROW
rcall ROTR_1_ROW
rjmp ShiftRow_RecoverZ_LPM
ShiftRow_6:
rcall ROTL_1_ROW
rcall ROTL_1_ROW
rjmp ShiftRow_RecoverZ_NoLPM
ShiftRow_7:
rcall ROTL_1_ROW
rjmp ShiftRow_RecoverZ_NoLPM
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Start AddRC_Sbox_ShiftRow
AddRC_Sbox_ShiftRow_Start:
clr t3
inc t3
ldi XL, lo8(SRAM_STATE)
ldi XH, hi8(SRAM_STATE)
ldi YL, pm_lo8(ShiftRow_routine_table)
ldi YH, pm_hi8(ShiftRow_routine_table)
ldi rmp, DIM
lpm t0, Z+ ; Load two nibbles of round constant for row 0, 1
AddRC_Sbox_ShiftRow_Loop:
; AddRC_TwoRows
ld x0, X+
ld x1, X+
ld x2, X+
ld x3, X+
sbiw XL, 4
ror t0
brcc next1
eor x0, t3
next1:
ror t0
brcc next2
eor x1, t3
next2:
ror t0
brcc next3
eor x2, t3
next3:
ror t0
brcc next4
eor x3, t3
next4:
; Sbox_TwoRows
eor x1, x2
mov t1, x2
and t1, x1
eor x3, t1
mov t1, x3
and x3, x1
eor x3, x2
mov t2, x3
eor x3, x0
com x3
mov x2, x3
or t2, x0
eor x0, t1
eor x1, x0
or x2, x1
eor x2, t1
eor x1, t2
eor x3, x1
movw cnt0, ZL
movw ZL, YL
ijmp
ShiftRow_RecoverZ_NoLPM:
movw ZL, cnt0
rjmp ShiftRow_STORE_ROW
ShiftRow_RecoverZ_LPM:
movw ZL, cnt0
lpm t0, Z+ ; Load two nibbles of round constant for row 2i, 2i+1
ShiftRow_STORE_ROW:
Store_OneRow
adiw YL, 1
dec rmp
brne AddRC_Sbox_ShiftRow_Loop
;;;;;;;;;;;;;;;;;;;;;;;; MixColumn Subroutnes
rjmp MC_Start
mul_routine_table:
rjmp mul2_GF16_0x13_xor
rjmp mul4_GF16_0x13_xor
rjmp mul2_GF16_0x13_xor
rjmp mulb_GF16_0x13_xor
rjmp mul2_GF16_0x13_xor
rjmp mul8_GF16_0x13_xor
rjmp mul5_GF16_0x13_xor
rjmp mul6_GF16_0x13_xor
; For all mul2_GF16_0x13_xor:
; Input
; MSB........LSB
; x0=@0: x1=@1: x2=@2: x3=@3
mul2_GF16_0x13_xor:
; # define mul2_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
; x3 = XOR (x3 ,x0); \
; } while (0) ; /* Output : ( MSB ) x1 ,x2 ,x3 , x0 ( LSB ) */
eor t3, t0
eor x0, t0
eor x1, t3
eor x2, t2
eor x3, t1
rjmp MC_INC_CNT1
mul4_GF16_0x13_xor:
; # define mul4_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
; x3 = XOR (x3 ,x0); x0 = XOR (x0 ,x1); \
; } while (0) ; /* Output : ( MSB ) x2 ,x3 ,x0 , x1 ( LSB ) */
eor t3, t0
eor t0, t1
eor x0, t1
eor x1, t0
eor x2, t3
eor x3, t2
rjmp MC_INC_CNT1
mul5_GF16_0x13_xor:
; # define mul5_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
; x2 = XOR (x2 ,x0); x3 = XOR (x3 ,x1); \
; x1 = XOR (x1 ,x2); x0 = XOR (x0 ,x3); \
; } while (0) ; /* Output : ( MSB ) x2 ,x0 ,x1 , x3 ( LSB ) */
eor t2, t0
eor t3, t1
eor t1, t2
eor t0, t3
eor x0, t3
eor x1, t1
eor x2, t0
eor x3, t2
rjmp MC_INC_CNT1
mul6_GF16_0x13_xor:
; # define mul6_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
; x3 = XOR (x3 ,x1); x1 = XOR (x1 ,x0); \
; x2 = XOR (x2 ,x1); x0 = XOR (x0 ,x2); \
; x2 = XOR (x2 ,x3); \
; } while (0) ; /* Output : ( MSB ) x0 ,x2 ,x3 , x1 ( LSB ) */
eor t3, t1
eor t1, t0
eor t2, t1
eor t0, t2
eor t2, t3
eor x0, t1
eor x1, t3
eor x2, t2
eor x3, t0
rjmp MC_STORE_ROW
mul8_GF16_0x13_xor:
; # define mul8_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
; x3 = XOR (x3 ,x0); x0 = XOR (x0 ,x1); \
; x1 = XOR (x1 ,x2); \
; } while (0) ; /* Output : ( MSB ) x3 ,x0 ,x1 , x2 ( LSB ) */
eor t3, t0
eor t0, t1
eor t1, t2
eor x0, t2
eor x1, t1
eor x2, t0
eor x3, t3
rjmp MC_INC_CNT1
mulb_GF16_0x13_xor:
; # define mul11_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
; x2 = XOR (x2 ,x0); x1 = XOR (x1 ,x3); \
; x0 = XOR (x0 ,x1); x3 = XOR (x3 ,x2); \
; } while (0) ; /* Output : ( MSB ) x1 ,x2 ,x0 , x3 ( LSB ) */
eor t2, t0
eor t1, t3
eor t0, t1
eor t3, t2
eor x0, t3
eor x1, t0
eor x2, t2
eor x3, t1
rjmp MC_INC_CNT1
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Start MixColumns
MC_Start:
movw addr4, ZL
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
movw YL, XL
clr cnt0
clr cnt1
A1:
mov cnt1, cnt0
clr x0
clr x1
clr x2
clr x3
ldi ZH, pm_hi8(mul_routine_table)
ldi ZL, pm_lo8(mul_routine_table)
MC_MUL_LOOP:
ld t3, X+
ld t2, X+
ld t1, X+
ld t0, X+
ijmp
MC_INC_CNT1:
inc cnt1
cpi cnt1, DIM
brne MC_MUL_NEXT
clr cnt1
movw XL, YL
MC_MUL_NEXT:
adiw ZL, 1
rjmp MC_MUL_LOOP
MC_STORE_ROW:
cpi cnt0, 0
brne MC_STORE_DIRECT
sbiw XL, STATE_INBYTES
MC_STORE_DIRECT:
Store_OneRow
inc cnt0
cpi cnt0, DIM
brne A1
movw ZL, addr4
ret
PHOTON_Permutation:
ldi ZH, hi8(RC)
ldi ZL, lo8(RC)
ldi cnt2, ROUND_N
round_loop_start:
rcall RoundFunction
dec cnt2
brne round_loop_start
ret
.section .text
RC:
.byte 0x01,0x62,0xFE,0x9D
.byte 0x23,0x40,0xDC,0xBF
.byte 0x67,0x04,0x98,0xFB
.byte 0xFE,0x9D,0x01,0x62
.byte 0xCD,0xAE,0x32,0x51
.byte 0xAB,0xC8,0x54,0x37
.byte 0x76,0x15,0x89,0xEA
.byte 0xDC,0xBF,0x23,0x40
.byte 0x89,0xEA,0x76,0x15
.byte 0x32,0x51,0xCD,0xAE
.byte 0x45,0x26,0xBA,0xD9
.byte 0xBA,0xD9,0x45,0x26
;
; **********************************************
; * PHOTON-Beetle *
; * Authenticated Encryption and Hash Family *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.0 2020 by PHOTON-Beetle Team *
; **********************************************
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Bitslice
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
.MACRO Reorder_8_bits i0, i1, i2, i3, i4
ror \i0
ror \i1
ror \i0
ror \i2
ror \i0
ror \i3
ror \i0
ror \i4
ror \i0
ror \i1
ror \i0
ror \i2
ror \i0
ror \i3
ror \i0
ror \i4
.ENDM
.MACRO InvReorder_8_bits i0, i1, i2, i3, i4
ror \i1
ror \i0
ror \i2
ror \i0
ror \i3
ror \i0
ror \i4
ror \i0
ror \i1
ror \i0
ror \i2
ror \i0
ror \i3
ror \i0
ror \i4
ror \i0
.ENDM
; require XH:XL be the address of the input
Load_Reorder_32_bits:
ldi cnt1, 4
reorder_8_bits_loop:
ld rmp, X+
Reorder_8_bits rmp, x0, x1, x2, x3
dec cnt1
brne reorder_8_bits_loop
ret
; require YH:YL be the address of the output
invReorder_Store_32_bits:
ldi cnt1, 4
invreorder_8_bits_loop:
InvReorder_8_bits rmp, x0, x1, x2, x3
st Y+, rmp
dec cnt1
brne invreorder_8_bits_loop
ret
; require XH:XL be the address of the input
; require YH:YL be the address of the output
Load_Reorder_Store_128_bits:
ldi cnt0, 4
reorder_32_bits_loop:
rcall Load_Reorder_32_bits
st Y+, x0
st Y+, x1
st Y+, x2
st Y+, x3
dec cnt0
brne reorder_32_bits_loop
ret
; require XH:XL be the address of the input
; require YH:YL be the address of the output
Load_invReorder_Store_128_bits:
ldi cnt0, 4
invreorder_32_bits_loop:
ld x0, X+
ld x1, X+
ld x2, X+
ld x3, X+
rcall invReorder_Store_32_bits
dec cnt0
brne invreorder_32_bits_loop
ret
.macro PUSH_ALL
push r2
push r3
push r4
push r5
push r6
push r7
push r8
push r9
push r10
push r11
push r12
push r13
push r14
push r15
push r16
push r17
push r28
push r29
.endm
.macro POP_ALL
pop r29
pop r28
pop r17
pop r16
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
pop r9
pop r8
pop r7
pop r6
pop r5
pop r4
pop r3
pop r2
clr r1
.endm
#ifdef __cplusplus
extern "C" {
#endif
int crypto_hash(
unsigned char *out,
const unsigned char *in,
unsigned long long inlen
);
#ifdef __cplusplus
}
#endif
\ No newline at end of file
;
; **********************************************
; * PHOTON-Beetle *
; * Authenticated Encryption and Hash Family *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.0 2020 by PHOTON-Beetle Team *
; **********************************************
;
#include "api.h"
;#define CRYPTO_AEAD
#define CRYPTO_HASH
#define STATE_INBITS 256
#define STATE_INBYTES ((STATE_INBITS + 7) / 8)
#define RATE_INBITS 128
#define RATE_INBYTES ((RATE_INBITS + 7) / 8)
#define RATE_INBYTES_MASK (RATE_INBYTES - 1)
#define TAG_MATCH 0
#define TAG_UNMATCH -1
#define OTHER_FAILURES -2
; For CRYPTO_HASH
#define INITIAL_RATE_INBITS 128
#define INITIAL_RATE_INBYTES ((INITIAL_RATE_INBITS + 7) / 8)
#define HASH_RATE_INBITS 32
#define HASH_RATE_INBYTES ((HASH_RATE_INBITS + 7) / 8)
#define HASH_RATE_INBYTES_MASK (HASH_RATE_INBYTES - 1)
;
; ============================================
; R E G I S T E R D E F I N I T I O N S
; ============================================
;
; [Add all register names here, include info on
; all used registers without specific names]
; rmp: Multipurpose register
#define rmp r16
#define rate r17
#define mclen r18
#define radlen r19
#define adlen_org r0
#define cnt0 r20
#define cnt1 r21
#define cnt2 r22
#define SQUEEZE_RATE_INBITS 128
#define SQUEEZE_RATE_INBYTES ((SQUEEZE_RATE_INBITS + 7) / 8)
#define SQUEEZE_RATE_MASK (SQUEEZE_RATE_INBYTES - 1)
#define CAPACITY_INBITS (STATE_INBITS - RATE_INBITS)
#define CAPACITY_INBYTES ((CAPACITY_INBITS + 7) / 8)
; For CRYPTO_AEAD
#define KEY_INBITS (CRYPTO_KEYBYTES * 8)
#define KEY_INBYTES (CRYPTO_KEYBYTES)
#define NONCE_INBITS (CRYPTO_NPUBBYTES * 8)
#define NONCE_INBYTES (CRYPTO_NPUBBYTES)
#define TAG_INBITS (CRYPTO_ABYTES * 8)
#define TAG_INBYTES (CRYPTO_ABYTES)
#define t0 r8
#define t1 r9
#define t2 r10
#define t3 r11
#define x0 r12
#define x1 r13
#define x2 r14
#define x3 r15
#define ed r1
#define addr0 r2
#define addr1 r3
#define addr2 r4
#define addr3 r5
#define addr4 r6
#define addr5 r7
; domain_cnt overlap with cnt0, only temporarily used, no need to back up
#define domain_cnt r20
#define domain_cnt0 r23
#define domain_cnt1 r24
.section .noinit
SRAM_STATE: .BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
.BYTE 0, 0, 0, 0, 0, 0, 0, 0
SRAM_MESSAGE_OUT_ADDR: .BYTE 0, 0
SRAM_MESSAGE_IN_ADDR: .BYTE 0, 0
SRAM_MESSAGE_IN_LEN: .BYTE 0, 0
#ifdef CRYPTO_AEAD
; For CRYPTO_AEAD
SRAM_ASSOCIATED_DATA_ADDR: .BYTE 0, 0
SRAM_ADLEN: .BYTE 0, 0
SRAM_NONCE_ADDR: .BYTE 0, 0
SRAM_KEY_ADDR: .BYTE 0, 0
SRAM_ADDITIONAL: .BYTE 0, 0, 0, 0
.BYTE 0, 0, 0, 0
.BYTE 0, 0, 0, 0
.BYTE 0, 0, 0, 0
#endif
; SRAM required additionally, besides those used for API
SRAM_PAD: .BYTE 0, 0, 0, 0
#if ((defined(CRYPTO_AEAD) && (RATE_INBYTES > 4)) || defined(CRYPTO_HASH))
.BYTE 0, 0, 0, 0
.BYTE 0, 0, 0, 0
.BYTE 0, 0, 0, 0
#endif
.section .text
#include "assist.h"
#include "photon.h"
AddDomainCounter:
ldi YH, hi8(SRAM_STATE + STATE_INBYTES - 3)
ldi YL, lo8(SRAM_STATE + STATE_INBYTES - 3)
ldi rmp, 0x80
ldi cnt1, 3
check_domain_bit:
ror domain_cnt
brcc no_xor
ld x0, Y
eor x0, rmp
st Y, x0
no_xor:
adiw YL, 1
dec cnt1
brne check_domain_bit
ret
; require XH:XL be the address of the current associated data/message block
XOR_to_State:
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
mov cnt0, rate
dec cnt0
XOR_to_State_loop:
rcall Load_Reorder_32_bits
ld rmp, Y
eor rmp, x0
st Y+, rmp
ld rmp, Y
eor rmp, x1
st Y+, rmp
ld rmp, Y
eor rmp, x2
st Y+, rmp
ld rmp, Y
eor rmp, x3
st Y+, rmp
subi cnt0, 4
brsh XOR_to_State_loop
; XH:XL are now the address of the next associated data/message block if this is not the last block
ret
; require XH:XL pointed to the source data to be padded
PAD_OneZero:
ldi YH, hi8(SRAM_PAD)
ldi YL, lo8(SRAM_PAD)
mov cnt1, rate
pad_copy:
ld rmp, X+
st Y+, rmp
dec cnt1
dec cnt0
brne pad_copy
pad_one:
ldi rmp, 1
st Y+, rmp
dec cnt1
breq pad_end
clr rmp
pad_zero:
st Y+, rmp
dec cnt1
brne pad_zero
pad_end:
ldi XH, hi8(SRAM_PAD)
ldi XL, lo8(SRAM_PAD)
; XH:XL are now pointed to last block needed to be processed
ret
HASH:
movw addr0, XL
hash_block_loop:
rcall PHOTON_Permutation
movw XL, addr0
cp rate, radlen
brsh hash_last_block
rcall XOR_to_State
movw addr0, XL
sub radlen, rate
rjmp hash_block_loop
hash_last_block:
cp radlen, rate
breq hash_xor_domain
mov cnt0, radlen
rcall PAD_OneZero
hash_xor_domain:
clr radlen
rcall XOR_to_State
mov domain_cnt, domain_cnt0
rcall AddDomainCounter
ret
TAG:
rcall PHOTON_Permutation
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
movw YL, addr2
rcall Load_invReorder_Store_128_bits
ret
#ifdef CRYPTO_AEAD
.IF (RATE_INBITS == 128)
XOR_to_Cipher:
mov t2, rate
cp t2, mclen
brlo XOR_to_Cipher_Start
mov t2, mclen ; backup the real length of the remaining message
XOR_to_Cipher_Start:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
ldi YH, hi8(SRAM_ADDITIONAL)
ldi YL, lo8(SRAM_ADDITIONAL)
rcall Load_invReorder_Store_128_bits ; State move to additional SRAM and reorder
movw XL, addr0
movw ZL, addr2
; XOR Part 2
sbiw YL, (RATE_INBYTES>>1) ; Pointed to Part 2
ldi cnt0, (RATE_INBYTES>>1)
cp cnt0, mclen
brlo XOR_Part2_Store_Cipher_begin
mov cnt0, mclen
XOR_Part2_Store_Cipher_begin:
sub mclen, cnt0
XOR_Part2_Store_Cipher_loop:
ld t0, Y+
ld x0, X+
eor x0, t0
st Z+, x0
dec cnt0
brne XOR_Part2_Store_Cipher_loop
cpi mclen, 1
brlo XOR_to_Cipher_END
; XOR (Part 1 >>> 1)
ldi cnt0, (RATE_INBYTES>>1)
cp cnt0, mclen
brlo XOR_Part1_Store_Cipher_begin
mov cnt0, mclen
XOR_Part1_Store_Cipher_begin:
sub mclen, cnt0
ldi YH, hi8(SRAM_ADDITIONAL)
ldi YL, lo8(SRAM_ADDITIONAL)
ld t0, Y
bst t0, 0
adiw YL, (RATE_INBYTES>>1)-1
ld t0, Y
ror t0
bld t0, 7
st Y, t0
ldi cnt1, (RATE_INBYTES>>1)-1
ROR_part1_loop:
ld t0, -Y
ror t0
st Y, t0
dec cnt1
brne ROR_part1_loop
XOR_Part1_Store_Cipher_loop:
ld t0, Y+
ld x0, X+
eor x0, t0
st Z+, x0
dec cnt0
brne XOR_Part1_Store_Cipher_loop
XOR_to_Cipher_END:
tst ed
brne XOR_to_Cipher_dec
XOR_to_Cipher_enc:
movw XL, addr0
cp t2, rate
brsh XOR_to_Cipher_XOR_to_State
mov cnt0, t2
rcall PAD_OneZero
rjmp XOR_to_Cipher_XOR_to_State
XOR_to_Cipher_dec:
movw XL, addr2
cp t2, rate
brsh XOR_to_Cipher_XOR_to_State
; need to be padded
mov cnt0, t2
rcall PAD_OneZero
XOR_to_Cipher_XOR_to_State:
rcall XOR_to_State
clr rmp
add addr0, t2
adc addr1, rmp
add addr2, t2
adc addr3, rmp
ret
.ELSE
; RATE_INBITS == 32
XOR_to_Cipher:
mov t2, rate
cp t2, mclen
brlo XOR_to_Cipher_Start
mov t2, mclen ; backup the real length of the remaining message
XOR_to_Cipher_Start:
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
ld x0, X+
ld x1, X+
ld x2, X+
ld x3, X+
ldi YH, hi8(SRAM_ADDITIONAL)
ldi YL, lo8(SRAM_ADDITIONAL)
rcall invReorder_Store_32_bits
movw XL, addr0
movw ZL, addr2
; XOR Part 2
sbiw YL, (RATE_INBYTES>>1) ; Pointed to Part 2
ldi cnt0, (RATE_INBYTES>>1)
cp cnt0, mclen
brlo XOR_Part2_Store_Cipher_begin
mov cnt0, mclen
XOR_Part2_Store_Cipher_begin:
sub mclen, cnt0
XOR_Part2_Store_Cipher_loop:
ld t0, Y+
ld x0, X+
eor x0, t0
st Z+, x0
dec cnt0
brne XOR_Part2_Store_Cipher_loop
cpi mclen, 1
brlo XOR_to_Cipher_END
; XOR (Part 1 >>> 1)
ldi cnt0, (RATE_INBYTES>>1)
cp cnt0, mclen
brlo XOR_Part1_Store_Cipher_begin
mov cnt0, mclen
XOR_Part1_Store_Cipher_begin:
sub mclen, cnt0
ldi YH, hi8(SRAM_ADDITIONAL)
ldi YL, lo8(SRAM_ADDITIONAL)
ld t0, Y+
ld t1, Y+
bst t0, 0
ror t1
ror t0
bld t1, 7
ld x0, X+
eor x0, t0
st Z+, x0
dec cnt0
breq XOR_to_Cipher_END
ld x0, X+
eor x0, t1
st Z+, x0
XOR_to_Cipher_END:
tst ed
brne XOR_to_Cipher_dec
XOR_to_Cipher_enc:
movw XL, addr0
cp t2, rate
brsh XOR_to_Cipher_XOR_to_State
mov cnt0, t2
rcall PAD_OneZero
rjmp XOR_to_Cipher_XOR_to_State
XOR_to_Cipher_dec:
movw XL, addr2
cp t2, rate
brsh XOR_to_Cipher_XOR_to_State
; need to be padded
mov cnt0, t2
rcall PAD_OneZero
XOR_to_Cipher_XOR_to_State:
rcall XOR_to_State
clr rmp
add addr0, t2
adc addr1, rmp
add addr2, t2
adc addr3, rmp
ret
.ENDIF
ENC:
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
lds XH, SRAM_MESSAGE_IN_ADDR
lds XL, SRAM_MESSAGE_IN_ADDR + 1
movw addr0, XL
movw addr2, ZL
enc_block_loop:
rcall PHOTON_Permutation
rcall XOR_to_Cipher
cpi mclen, 1
brsh enc_block_loop
mov domain_cnt, domain_cnt1
rcall AddDomainCounter
ret
AUTH_AND_ENCDEC:
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
lds XH, SRAM_NONCE_ADDR
lds XL, SRAM_NONCE_ADDR + 1
rcall Load_Reorder_Store_128_bits
lds XH, SRAM_KEY_ADDR
lds XL, SRAM_KEY_ADDR + 1
rcall Load_Reorder_Store_128_bits
ldi domain_cnt0, 1
ldi domain_cnt1, 1
test_adlen_zero:
tst radlen
breq adlen_zero_test_mlen_zero
; radlen != 0
adlen_nzero_test_mlen_zero:
tst mclen
brne test_adlen_divisible
ldi domain_cnt0, 3
test_adlen_divisible:
mov rmp, radlen
andi rmp, RATE_INBYTES_MASK
breq hash_ad
inc domain_cnt0 ; 2 or 4
hash_ad:
lds XH, SRAM_ASSOCIATED_DATA_ADDR
lds XL, SRAM_ASSOCIATED_DATA_ADDR + 1
rcall HASH
tst mclen
breq mlen_zero_inputout_address
rjmp test_mlen_divisible
adlen_zero_test_mlen_zero:
ldi domain_cnt1, 5
tst mclen
breq adlen_zero_mlen_zero
; mclen != 0
test_mlen_divisible:
mov rmp, mclen
andi rmp, RATE_INBYTES_MASK
breq enc_dec_m
inc domain_cnt1 ; 2 or 6
enc_dec_m:
rcall ENC
rjmp AUTH_AND_ENCDEC_end
adlen_zero_mlen_zero:
; empty message and empty associated data
ldi YH, hi8(SRAM_STATE + STATE_INBYTES - 3)
ldi YL, lo8(SRAM_STATE + STATE_INBYTES - 3)
ld x0, Y
ldi rmp, 0x80
eor x0, rmp
st Y, x0
mlen_zero_inputout_address:
tst ed
brne dec_inputout_address
enc_inputout_address:
lds ZH, SRAM_MESSAGE_OUT_ADDR
lds ZL, SRAM_MESSAGE_OUT_ADDR + 1
movw addr2, ZL
rjmp AUTH_AND_ENCDEC_end
dec_inputout_address:
lds ZH, SRAM_MESSAGE_IN_ADDR
lds ZL, SRAM_MESSAGE_IN_ADDR + 1
movw addr0, ZL
AUTH_AND_ENCDEC_end:
ret
; void crypto_aead_encrypt_asm(
; unsigned char *c,
; const unsigned char *m,
; unsigned long long mlen,
; const unsigned char *ad,
; unsigned long long radlen,
; const unsigned char *npub,
; const unsigned char *k
; )
;
; unsigned char *c, is passed in r24:r25
; const unsigned char *m, is passed in r22:r23
; unsigned long long mlen, is passed in r20:r21, only LSB (r20) is used
; const unsigned char *ad, is passed in r18:r19
; unsigned long long radlen, is passed in r16:r17, only LSB (r16) is used
; const unsigned char *npub, is passed in r14:r15
; const unsigned char *k is passed in r12:r13
.global crypto_aead_encrypt_asm
crypto_aead_encrypt_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store cipher address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store message address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store message length in SRAM_MESSAGE_IN_LEN
st X+, r20
st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR
st X+, r18
st X+, r17 ;store associated data length in SRAM_ADLEN
st X+, r16
st X+, r15 ;store nonce address in SRAM_NONCE_ADDR
st X+, r14
st X+, r13 ;store key address in SRAM_KEY_ADDR
st X+, r12
mov mclen, r20
mov radlen, r16
ldi rate, RATE_INBYTES
clr ed
rcall AUTH_AND_ENCDEC
rcall TAG
POP_ALL
ret
; int crypto_aead_decrypt_asm(
; unsigned char *m,
; const unsigned char *c,
; unsigned long long clen,
; const unsigned char *ad,
; unsigned long long radlen,
; const unsigned char *npub,
; const unsigned char *k
; )
;
; unsigned char *m, is passed in r24:r25
; const unsigned char *c, is passed in r22:r23
; unsigned long long clen, is passed in r20:r21, only LSB (r20) is used
; const unsigned char *ad, is passed in r18:r19
; unsigned long long radlen, is passed in r16:r17, only LSB (r16) is used
; const unsigned char *npub, is passed in r14:r15
; const unsigned char *k is passed in r12:r13
.global crypto_aead_decrypt_asm
crypto_aead_decrypt_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN
st X+, r20
st X+, r19 ;store associated data address in SRAM_ASSOCIATED_DATA_ADDR
st X+, r18
st X+, r17 ;store associated data length in SRAM_ADLEN
st X+, r16
st X+, r15 ;store nonce address in SRAM_NONCE_ADDR
st X+, r14
st X+, r13 ;store key address in SRAM_KEY_ADDR
st X+, r12
mov mclen, r20
mov radlen, r16
ldi rate, RATE_INBYTES
clr ed
inc ed
rcall AUTH_AND_ENCDEC
ldi YH, hi8(SRAM_ADDITIONAL)
ldi YL, lo8(SRAM_ADDITIONAL)
movw addr2, YL
rcall TAG
sbiw YL, CRYPTO_ABYTES
movw XL, addr0
ldi cnt0, CRYPTO_ABYTES
compare_tag:
ld t0, Y+
ld x0, X+
cp t0, x0
brne return_tag_not_match
dec cnt0
brne compare_tag
rjmp return_tag_match
return_tag_not_match:
ldi r25, 0xFF
ldi r24, 0xFF
rjmp crypto_aead_decrypt_end
return_tag_match:
clr r25
clr r24
crypto_aead_decrypt_end:
POP_ALL
ret
; #ifdef CRYPTO_AEAD
#endif
#ifdef CRYPTO_HASH
; void crypto_hash_asm(
; unsigned char *out,
; const unsigned char *in,
; unsigned long long inlen
; )
;
; unsigned char *out, is passed in r24:r25
; const unsigned char *in, is passed in r22:r23
; unsigned long long inlen, is passed in r20:r21, only LSB (r20) is used
.global crypto_hash_asm
crypto_hash_asm:
PUSH_ALL
ldi XH, hi8(SRAM_MESSAGE_OUT_ADDR)
ldi XL, lo8(SRAM_MESSAGE_OUT_ADDR)
st X+, r25 ;store message address in SRAM_MESSAGE_OUT_ADDR
st X+, r24
st X+, r23 ;store cipher address in SRAM_MESSAGE_IN_ADDR
st X+, r22
st X+, r21 ;store cipher length in SRAM_MESSAGE_IN_LEN
st X+, r20
mov mclen, r20
; empty half state
ldi YH, hi8(SRAM_STATE + INITIAL_RATE_INBYTES)
ldi YL, lo8(SRAM_STATE + INITIAL_RATE_INBYTES)
clr rmp
ldi cnt1, (STATE_INBYTES - INITIAL_RATE_INBYTES)
zero_state:
st Y+, rmp
dec cnt1
brne zero_state
ldi domain_cnt0, 1
sbiw YL, STATE_INBYTES
lds XH, SRAM_MESSAGE_IN_ADDR
lds XL, SRAM_MESSAGE_IN_ADDR + 1
tst mclen
breq add_domain
test_mlen_initrate:
; mclen != 0
cpi mclen, INITIAL_RATE_INBYTES
brlo less_than_initial_rate
breq equal_to_initial_rate
more_than_initial_rate:
rcall Load_Reorder_Store_128_bits
ldi rate, HASH_RATE_INBYTES
mov radlen, mclen
subi radlen, INITIAL_RATE_INBYTES
mov rmp, radlen
andi rmp, HASH_RATE_INBYTES_MASK
breq hash_message
inc domain_cnt0
hash_message:
rcall HASH
rjmp gen_digest
equal_to_initial_rate:
inc domain_cnt0
rcall Load_Reorder_Store_128_bits
rjmp add_domain
less_than_initial_rate:
mov cnt0, mclen
ldi rate, INITIAL_RATE_INBYTES
rcall PAD_OneZero
ldi YH, hi8(SRAM_STATE)
ldi YL, lo8(SRAM_STATE)
rcall Load_Reorder_Store_128_bits
rjmp add_domain
add_domain:
mov domain_cnt, domain_cnt0
rcall AddDomainCounter
gen_digest:
lds XH, SRAM_MESSAGE_OUT_ADDR
lds XL, SRAM_MESSAGE_OUT_ADDR + 1
movw addr2, XL
rcall TAG
movw XL, addr2
adiw XL, SQUEEZE_RATE_INBYTES
movw addr2, XL
rcall TAG
POP_ALL
ret
#endif
\ No newline at end of file
#include <avr/io.h>
#include <avr/sfr_defs.h>
#include <stdlib.h>
#include <string.h>
#include "api.h"
#include "crypto_hash.h"
extern void crypto_hash_asm(
unsigned char *out,
const unsigned char *in,
unsigned char inlen
);
int crypto_hash(
unsigned char *out,
const unsigned char *in,
unsigned long long inlen
)
{
/*
...
... the code for the hash function implementation goes here
... generating a hash value out[0],out[1],...,out[CRYPTO_BYTES-1]
... from a message in[0],in[1],...,in[in-1]
...
... return 0;
*/
crypto_hash_asm(out, in, inlen);
return 0;
}
\ No newline at end of file
;
; **********************************************
; * PHOTON-Beetle *
; * Authenticated Encryption and Hash Family *
; * *
; * Assembly implementation for 8-bit AVR CPU *
; * Version 1.0 2020 by PHOTON-Beetle Team *
; **********************************************
;
#define ROUND_N 12
#define DIM 8
.MACRO Store_OneRow
st X+, x0
st X+, x1
st X+, x2
st X+, x3
.ENDM
.MACRO ROTL_1 i0
bst \i0, 7
lsl \i0
bld \i0, 0
.ENDM
.MACRO ROTR_1 i0
bst \i0, 0
lsr \i0
bld \i0, 7
.ENDM
.MACRO ROTR_4 i0
swap \i0
.ENDM
ROTR_1_ROW:
ROTR_1 x0
ROTR_1 x1
ROTR_1 x2
ROTR_1 x3
ret
ROTL_1_ROW:
ROTL_1 x0
ROTL_1 x1
ROTL_1 x2
ROTL_1 x3
ret
ROTR_4_ROW:
ROTR_4 x0
ROTR_4 x1
ROTR_4 x2
ROTR_4 x3
ret
RoundFunction:
rjmp AddRC_Sbox_ShiftRow_Start
ShiftRow_routine_table:
rjmp ShiftRow_RecoverZ_NoLPM
rjmp ShiftRow_1
rjmp ShiftRow_2
rjmp ShiftRow_3
rjmp ShiftRow_4
rjmp ShiftRow_5
rjmp ShiftRow_6
rjmp ShiftRow_7
ShiftRow_1:
rcall ROTR_1_ROW
rjmp ShiftRow_RecoverZ_LPM
ShiftRow_2:
rcall ROTR_1_ROW
rcall ROTR_1_ROW
rjmp ShiftRow_RecoverZ_NoLPM
ShiftRow_3:
rcall ROTR_4_ROW
rcall ROTL_1_ROW
rjmp ShiftRow_RecoverZ_LPM
ShiftRow_4:
rcall ROTR_4_ROW
rjmp ShiftRow_RecoverZ_NoLPM
ShiftRow_5:
rcall ROTR_4_ROW
rcall ROTR_1_ROW
rjmp ShiftRow_RecoverZ_LPM
ShiftRow_6:
rcall ROTL_1_ROW
rcall ROTL_1_ROW
rjmp ShiftRow_RecoverZ_NoLPM
ShiftRow_7:
rcall ROTL_1_ROW
rjmp ShiftRow_RecoverZ_NoLPM
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Start AddRC_Sbox_ShiftRow
AddRC_Sbox_ShiftRow_Start:
clr t3
inc t3
ldi XL, lo8(SRAM_STATE)
ldi XH, hi8(SRAM_STATE)
ldi YL, pm_lo8(ShiftRow_routine_table)
ldi YH, pm_hi8(ShiftRow_routine_table)
ldi rmp, DIM
lpm t0, Z+ ; Load two nibbles of round constant for row 0, 1
AddRC_Sbox_ShiftRow_Loop:
; AddRC_TwoRows
ld x0, X+
ld x1, X+
ld x2, X+
ld x3, X+
sbiw XL, 4
ror t0
brcc next1
eor x0, t3
next1:
ror t0
brcc next2
eor x1, t3
next2:
ror t0
brcc next3
eor x2, t3
next3:
ror t0
brcc next4
eor x3, t3
next4:
; Sbox_TwoRows
eor x1, x2
mov t1, x2
and t1, x1
eor x3, t1
mov t1, x3
and x3, x1
eor x3, x2
mov t2, x3
eor x3, x0
com x3
mov x2, x3
or t2, x0
eor x0, t1
eor x1, x0
or x2, x1
eor x2, t1
eor x1, t2
eor x3, x1
movw cnt0, ZL
movw ZL, YL
ijmp
ShiftRow_RecoverZ_NoLPM:
movw ZL, cnt0
rjmp ShiftRow_STORE_ROW
ShiftRow_RecoverZ_LPM:
movw ZL, cnt0
lpm t0, Z+ ; Load two nibbles of round constant for row 2i, 2i+1
ShiftRow_STORE_ROW:
Store_OneRow
adiw YL, 1
dec rmp
brne AddRC_Sbox_ShiftRow_Loop
;;;;;;;;;;;;;;;;;;;;;;;; MixColumn Subroutnes
rjmp MC_Start
mul_routine_table:
rjmp mul2_GF16_0x13_xor
rjmp mul4_GF16_0x13_xor
rjmp mul2_GF16_0x13_xor
rjmp mulb_GF16_0x13_xor
rjmp mul2_GF16_0x13_xor
rjmp mul8_GF16_0x13_xor
rjmp mul5_GF16_0x13_xor
rjmp mul6_GF16_0x13_xor
; For all mul2_GF16_0x13_xor:
; Input
; MSB........LSB
; x0=@0: x1=@1: x2=@2: x3=@3
mul2_GF16_0x13_xor:
; # define mul2_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
; x3 = XOR (x3 ,x0); \
; } while (0) ; /* Output : ( MSB ) x1 ,x2 ,x3 , x0 ( LSB ) */
eor t3, t0
eor x0, t0
eor x1, t3
eor x2, t2
eor x3, t1
rjmp MC_INC_CNT1
mul4_GF16_0x13_xor:
; # define mul4_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
; x3 = XOR (x3 ,x0); x0 = XOR (x0 ,x1); \
; } while (0) ; /* Output : ( MSB ) x2 ,x3 ,x0 , x1 ( LSB ) */
eor t3, t0
eor t0, t1
eor x0, t1
eor x1, t0
eor x2, t3
eor x3, t2
rjmp MC_INC_CNT1
mul5_GF16_0x13_xor:
; # define mul5_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
; x2 = XOR (x2 ,x0); x3 = XOR (x3 ,x1); \
; x1 = XOR (x1 ,x2); x0 = XOR (x0 ,x3); \
; } while (0) ; /* Output : ( MSB ) x2 ,x0 ,x1 , x3 ( LSB ) */
eor t2, t0
eor t3, t1
eor t1, t2
eor t0, t3
eor x0, t3
eor x1, t1
eor x2, t0
eor x3, t2
rjmp MC_INC_CNT1
mul6_GF16_0x13_xor:
; # define mul6_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
; x3 = XOR (x3 ,x1); x1 = XOR (x1 ,x0); \
; x2 = XOR (x2 ,x1); x0 = XOR (x0 ,x2); \
; x2 = XOR (x2 ,x3); \
; } while (0) ; /* Output : ( MSB ) x0 ,x2 ,x3 , x1 ( LSB ) */
eor t3, t1
eor t1, t0
eor t2, t1
eor t0, t2
eor t2, t3
eor x0, t1
eor x1, t3
eor x2, t2
eor x3, t0
rjmp MC_STORE_ROW
mul8_GF16_0x13_xor:
; # define mul8_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
; x3 = XOR (x3 ,x0); x0 = XOR (x0 ,x1); \
; x1 = XOR (x1 ,x2); \
; } while (0) ; /* Output : ( MSB ) x3 ,x0 ,x1 , x2 ( LSB ) */
eor t3, t0
eor t0, t1
eor t1, t2
eor x0, t2
eor x1, t1
eor x2, t0
eor x3, t3
rjmp MC_INC_CNT1
mulb_GF16_0x13_xor:
; # define mul11_GF16_0x13 (x0 ,x1 ,x2 ,x3) do { \
; x2 = XOR (x2 ,x0); x1 = XOR (x1 ,x3); \
; x0 = XOR (x0 ,x1); x3 = XOR (x3 ,x2); \
; } while (0) ; /* Output : ( MSB ) x1 ,x2 ,x0 , x3 ( LSB ) */
eor t2, t0
eor t1, t3
eor t0, t1
eor t3, t2
eor x0, t3
eor x1, t0
eor x2, t2
eor x3, t1
rjmp MC_INC_CNT1
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Start MixColumns
MC_Start:
movw addr4, ZL
ldi XH, hi8(SRAM_STATE)
ldi XL, lo8(SRAM_STATE)
movw YL, XL
clr cnt0
clr cnt1
A1:
mov cnt1, cnt0
clr x0
clr x1
clr x2
clr x3
ldi ZH, pm_hi8(mul_routine_table)
ldi ZL, pm_lo8(mul_routine_table)
MC_MUL_LOOP:
ld t3, X+
ld t2, X+
ld t1, X+
ld t0, X+
ijmp
MC_INC_CNT1:
inc cnt1
cpi cnt1, DIM
brne MC_MUL_NEXT
clr cnt1
movw XL, YL
MC_MUL_NEXT:
adiw ZL, 1
rjmp MC_MUL_LOOP
MC_STORE_ROW:
cpi cnt0, 0
brne MC_STORE_DIRECT
sbiw XL, STATE_INBYTES
MC_STORE_DIRECT:
Store_OneRow
inc cnt0
cpi cnt0, DIM
brne A1
movw ZL, addr4
ret
PHOTON_Permutation:
ldi ZH, hi8(RC)
ldi ZL, lo8(RC)
ldi cnt2, ROUND_N
round_loop_start:
rcall RoundFunction
dec cnt2
brne round_loop_start
ret
.section .text
RC:
.byte 0x01,0x62,0xFE,0x9D
.byte 0x23,0x40,0xDC,0xBF
.byte 0x67,0x04,0x98,0xFB
.byte 0xFE,0x9D,0x01,0x62
.byte 0xCD,0xAE,0x32,0x51
.byte 0xAB,0xC8,0x54,0x37
.byte 0x76,0x15,0x89,0xEA
.byte 0xDC,0xBF,0x23,0x40
.byte 0x89,0xEA,0x76,0x15
.byte 0x32,0x51,0xCD,0xAE
.byte 0x45,0x26,0xBA,0xD9
.byte 0xBA,0xD9,0x45,0x26
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment