Commit 5da2389f by Ferdinand Bachmann Committed by Sebastian Renner

new ascon

parent 0f49c046
#define CRYPTO_ABYTES 16
#include <xtensa/coreasm.h>
#include "api.h"
#define t0h a4
#define t0l a5
#define x0h a6
#define x0l a7
#define x1h a8
#define x1l a9
#define x2h a10
#define x2l a11
#define x3h a12
#define x3l a13
#define x4h a14
#define x4l a15
#define optr x2h
#define iptr x2l
#define ilen x3h
#define mode x3l
#define t1h x4h
#define t1l x4l
## +-----------+-----------+-----------+------------+-----------+
## | RATE 16 | RATE 8 | RATE 8 | RATE 8 | RATE 8 |
## | PA 12 | PA 12 | PA 12 | PA 12 | PA 12 |
## | PB 8 | PB 6 | PB 6 | PB 8 | PB 12 |
## | KEY 16 | KEY 16 | KEY 20 | | |
## +-----------+-----------+-----------+------------+-----------+
## 0 | bytes | bytes | bytes | bytes | bytes |
## 4 | | | \---- | \---- | \---- | \---- |
## 8 | | | optr | optr | optr | optr |
## 12 | \---- | iptr | iptr | iptr cur | iptr cur |
## 16 | state x2h | state x2h | state x2h | | |
## 20 | | x2l | | x2l | | x2l | state x2l | state x2l |
## 24 | | x3h | | x3h | | x3h | \---- x3h | \---- x3h |
## 28 | | x3l | \---- x3l | \---- x3l | | |
## 32 | | x4h | ilen | ilen | ilen cur | ilen cur |
## 36 | \---- x4l | mode cur | mode cur | olen | olen |
## 40 | key k0h | key k0h | key k1h | | |
## 44 | | k0l | | k0l | | k1l | lr | lr |
## 48 | | k1h | | k1h | | k2h +------------+-----------+
## 52 | \---- k1l | \---- k1l | | k2l |
## 56 | | | \---- k0h |
## 60 | optr cur | optr cur | optr cur |
## 64 | iptr cur | iptr cur | iptr cur |
## 68 | ilen cur | ilen cur | ilen cur |
## 72 | mode cur | lr2 | lr2 |
## 76 | optr | lr | lr |
## 80 | iptr +-----------+-----------+
## 84 | ilen | | |
## 88 | lr2 | | |
## 92 | lr +-----------+-----------+
## 96 +-----------+ kptr arg | kptr arg |
## 100 | | mode arg | mode arg |
## 104 | +-----------+-----------+
## 108 +-----------+
## 112 | kptr arg |
## 116 | mode arg |
## 120 +-----------+
## ASCON128a
#define RATE 16
#define PA_ROUNDS 12
#define PA_START_ROUND 0xf0
#define PB_ROUNDS 8
#define PB_START_ROUND 0xb4
#define IVh (((8 * CRYPTO_KEYBYTES) << 24) | ((8 * RATE) << 16) | (PA_ROUNDS << 8) | (PB_ROUNDS << 0))
#define IVl 0
#define S_state 16
#define S_key 40
#define S_optr_cur 60
#define S_iptr_cur 64
#define S_ilen_cur 68
#define S_mode_cur 72
#define S_optr 76
#define S_iptr 80
#define S_ilen 84
#define S_lr2 88
#define S_lr 92
#define S_kptr_arg 112
#define S_mode_arg 116
.macro sbox x0, x1, x2, x3, x4, t0, t1, t2
xor \t2, \x3, \x4
xor \t1, \x0, \x4
movi \t0, -1
xor \x4, \x4, \t0
xor \t0, \x1, \x2
or \x4, \x4, \x3
xor \x4, \x4, \t0
xor \x3, \x3, \x1
or \x3, \x3, \t0
xor \x3, \x3, \t1
xor \x2, \x2, \t1
or \x2, \x2, \x1
xor \x2, \x2, \t2
or \x0, \x0, \t2
xor \t0, \t0, \x0
and \t1, \t1, \x1
xor \x1, \x1, \t1
xor \x1, \x1, \t2
.macro linear dl, dh, sl, sh, sl0, sh0, r0, sl1, sh1, r1, t0
ssai \r0
src \dl, \sh0, \sl0
src \dh, \sl0, \sh0
xor \dl, \dl, \sl
xor \dh, \dh, \sh
ssai \r1
src \t0, \sh1, \sl1
src \sh, \sl1, \sh1
xor \dl, \dl, \t0
xor \dh, \dh, \sh
.align 4
.globl ascon_permute
.type ascon_permute,@function
# ascon permutation
# state in a6 .. a9 and sp + 16 .. sp + 36
# start round in a2
# temporaries in a3, a4, a5
l32i x2h, a1, (S_state + 0)
l32i x2l, a1, (S_state + 4)
l32i x3h, a1, (S_state + 8)
l32i x3l, a1, (S_state + 12)
l32i x4h, a1, (S_state + 16)
l32i x4l, a1, (S_state + 20)
.align 4
.globl ascon_permute_noload
.type ascon_permute_noload,@function
# state in a6 .. a15
# start round constant in a2
# round count in a3
# temporaries in a3, a4, a5
# ESP32 zero-overhead looping
floop a3, Ploop
# round constant
xor x2l, x2l, a2
# s-box
sbox x0l, x1l, x2l, x3l, x4l, t0l, t0h, a3
sbox x0h, x1h, x2h, x3h, x4h, t0h, x0l, a3
# linear layer
linear x0l, x0h, x2l, x2h, x2l, x2h, 19, x2l, x2h, 28, a3
linear x2l, x2h, x4l, x4h, x4l, x4h, 1, x4l, x4h, 6, a3
linear x4l, x4h, x1l, x1h, x1l, x1h, 7, x1h, x1l, 9, a3
linear x1l, x1h, x3l, x3h, x3h, x3l, 29, x3h, x3l, 7, a3
linear x3l, x3h, t0l, t0h, t0l, t0h, 10, t0l, t0h, 17, a3
# condition
addi a2, a2, -15
floopend a3, Ploop
s32i x2h, a1, (S_state + 0)
s32i x2l, a1, (S_state + 4)
s32i x3h, a1, (S_state + 8)
s32i x3l, a1, (S_state + 12)
s32i x4h, a1, (S_state + 16)
s32i x4l, a1, (S_state + 20)
.align 4
.globl ascon_rev8
.type ascon_rev8,@function
# ascon bytereverse one block
# arguments and results in a4, a5, a14, a15
# temporaries in a2
ssai 8
srli a2, t1h, 16
src a2, a2, t1h
src a2, a2, a2
src t1h, t1h, a2
srli a2, t1l, 16
src a2, a2, t1l
src a2, a2, a2
src t1l, t1l, a2
srli a2, t0h, 16
src a2, a2, t0h
src a2, a2, a2
src t0h, t0h, a2
srli a2, t0l, 16
src a2, a2, t0l
src a2, a2, a2
src t0l, t0l, a2
.align 4
.globl ascon_memcpy
.type ascon_memcpy,@function
# memcpy that preserves registers used by ascon
# dest in a2
# src in a3
# temporaries in a4, a5
movi a4, 0
j .LMcond
l8ui a5, a3, 0
s8i a5, a2, 0
addi a2, a2, 1
addi a3, a3, 1
addi a4, a4, 1
bltu a4, ilen, .LMloop
.align 4
.globl ascon_duplex
.type ascon_duplex,@function
s32i a0, a1, S_lr2
j .LDcond
l32i t0h, iptr, 0
l32i t0l, iptr, 4
l32i t1h, iptr, 8
l32i t1l, iptr, 12
call0 ascon_rev8
xor x0h, x0h, t0h
xor x0l, x0l, t0l
xor x1h, x1h, t1h
xor x1l, x1l, t1l
beqz a13, .LDreset
# ascon_rev8
# inlined here to preserve registers
ssai 8
srli a2, x0h, 16
src a2, a2, x0h
src a2, a2, a2
src a2, x0h, a2
s32i a2, optr, 0
srli a2, x0l, 16
src a2, a2, x0l
src a2, a2, a2
src a2, x0l, a2
s32i a2, optr, 4
srli a2, x1h, 16
src a2, a2, x1h
src a2, a2, a2
src a2, x1h, a2
s32i a2, optr, 8
srli a2, x1l, 16
src a2, a2, x1l
src a2, a2, a2
src a2, x1l, a2
s32i a2, optr, 12
bgez mode, .LDpermute
mov x0h, t0h
mov x0l, t0l
mov x1h, t1h
mov x1l, t1l
s32i optr, a1, S_optr_cur
s32i iptr, a1, S_iptr_cur
s32i ilen, a1, S_ilen_cur
movi a3, PB_ROUNDS
call0 ascon_permute
l32i optr, a1, S_optr_cur
l32i iptr, a1, S_iptr_cur
l32i ilen, a1, S_ilen_cur
l32i mode, a1, S_mode_cur
addi optr, optr, RATE
addi iptr, iptr, RATE
addi ilen, ilen, -RATE
bgeui ilen, RATE, .LDloop
movi a2, 0
s32i a2, a1, 0
s32i a2, a1, 4
s32i a2, a1, 8
s32i a2, a1, 12
mov a2, a1
mov a3, iptr
call0 ascon_memcpy
movi a4, 0x80
add a2, a1, ilen
l8ui a3, a2, 0
xor a3, a3, a4
s8i a3, a2, 0
l32i t0h, a1, 0
l32i t0l, a1, 4
l32i t1h, a1, 8
l32i t1l, a1, 12
call0 ascon_rev8
xor x0h, x0h, t0h
xor x0l, x0l, t0l
xor x1h, x1h, t1h
xor x1l, x1l, t1l
beqz mode, .LDendreset
mov t0h, x0h
mov t0l, x0l
mov t1h, x1h
mov t1l, x1l
call0 ascon_rev8
s32i t0h, a1, 0
s32i t0l, a1, 4
s32i t1h, a1, 8
s32i t1l, a1, 12
mov a2, optr
mov a3, a1
call0 ascon_memcpy
bgez mode, .LDreturn
mov a2, a1
mov a3, iptr
call0 ascon_memcpy
l32i t0h, a1, 0
l32i t0l, a1, 4
l32i t1h, a1, 8
l32i t1l, a1, 12
call0 ascon_rev8
mov x0h, t0h
mov x0l, t0l
mov x1h, t1h
mov x1l, t1l
add optr, optr, ilen
add iptr, iptr, ilen
l32i a0, a1, S_lr2
.align 4
.globl ascon_core
.type ascon_core,@function
abi_entry 96, 4
s32i a0, a1, S_lr
s32i a2, a1, S_optr
s32i a3, a1, S_iptr
s32i a4, a1, S_ilen
s32i a5, a1, S_iptr_cur
s32i a6, a1, S_ilen_cur
# load key
l32i a2, a1, S_kptr_arg
l32i t0h, a2, 0
l32i t0l, a2, 4
l32i t1h, a2, 8
l32i t1l, a2, 12
call0 ascon_rev8
s32i t0h, a1, (S_key + 0)
s32i t0l, a1, (S_key + 4)
s32i t1h, a1, (S_key + 8)
s32i t1l, a1, (S_key + 12)
mov x1h, t0h
mov x1l, t0l
mov x2h, t1h
mov x2l, t1l
# load nonce
# a7 is not clobbered by ascon_rev8
# a7 does not overlap x1, x2, t0, or t1
# x4 overlaps t1, move unnecessary
mov a2, a7
l32i t0h, a2, 0
l32i t0l, a2, 4
l32i t1h, a2, 8
l32i t1l, a2, 12
call0 ascon_rev8
mov x3h, t0h
mov x3l, t0l
# load IV
# this clobbers a7
movi x0h, IVh
movi x0l, IVl
movi a3, PA_ROUNDS
call0 ascon_permute_noload
# xor key
# x4 overlaps t1, do in two steps
l32i t0h, a1, (S_key + 0)
l32i t0l, a1, (S_key + 4)
xor x3h, x3h, t0h
xor x3l, x3l, t0l
l32i t0h, a1, (S_key + 8)
l32i t0l, a1, (S_key + 12)
xor x4h, x4h, t0h
xor x4l, x4l, t0l
# save state
s32i x2h, a1, (S_state + 0)
s32i x2l, a1, (S_state + 4)
s32i x3h, a1, (S_state + 8)
s32i x3l, a1, (S_state + 12)
s32i x4h, a1, (S_state + 16)
s32i x4l, a1, (S_state + 20)
l32i ilen, a1, S_ilen_cur
beqz ilen, .LCskipad
l32i iptr, a1, S_iptr_cur
movi mode, 0
s32i mode, a1, S_mode_cur
call0 ascon_duplex
movi a3, PB_ROUNDS
call0 ascon_permute
movi a2, 1
xor x4l, x4l, a2
s32i x4l, a1, (S_state + 20)
l32i optr, a1, S_optr
l32i iptr, a1, S_iptr
l32i ilen, a1, S_ilen
l8ui mode, a1, S_mode_arg
sext mode, mode, 7
s32i mode, a1, S_mode_cur
call0 ascon_duplex
s32i optr, a1, S_optr_cur
s32i iptr, a1, S_iptr_cur
# restore state
l32i x2h, a1, (S_state + 0)
l32i x2l, a1, (S_state + 4)
l32i x3h, a1, (S_state + 8)
l32i x3l, a1, (S_state + 12)
l32i x4h, a1, (S_state + 16)
l32i x4l, a1, (S_state + 20)
# xor key
# x4 overlaps t1, do in two steps
l32i t0h, a1, (S_key + 0)
l32i t0l, a1, (S_key + 4)
xor x2h, x2h, t0h
xor x2l, x2l, t0l
l32i t0h, a1, (S_key + 8)
l32i t0l, a1, (S_key + 12)
xor x3h, x3h, t0h
xor x3l, x3l, t0l
movi a3, PA_ROUNDS
call0 ascon_permute_noload
# xor key
# x4 overlaps t1, do in two steps
l32i t0h, a1, (S_key + 0)
l32i t0l, a1, (S_key + 4)
xor x3h, x3h, t0h
xor x3l, x3l, t0l
l32i t0h, a1, (S_key + 8)
l32i t0l, a1, (S_key + 12)
xor x4h, x4h, t0h
xor x4l, x4l, t0l
l32i a2, a1, S_mode_cur
bgez a2, .LCencrypt
# save x4 into x0
# x0 is no longer needed
# x4 overlaps t1
mov x0h, x4h
mov x0l, x4l
l32i a2, a1, S_iptr_cur
l32i t0h, a2, 0
l32i t0l, a2, 4
l32i t1h, a2, 8
l32i t1l, a2, 12
call0 ascon_rev8
# check tag
# x4 is in x0
xor a2, x3h, t0h
xor a3, x3l, t0l
xor a2, a2, a3
xor a3, x0h, t1h
xor a2, a2, a3
xor a3, x0l, t1l
xor a2, a2, a3
beqz a2, .LCzeroreturn
movi a2, -1
j .LCreturn
# store tag
# x4 overlaps t1, move unnecessary
mov t0h, x3h
mov t0l, x3l
call0 ascon_rev8
l32i a2, a1, S_optr_cur
s32i t0h, a2, 0
s32i t0l, a2, 4
s32i t1h, a2, 8
s32i t1l, a2, 12
movi a2, 0
l32i a0, a1, S_lr
#include "api.h"
int ascon_core(unsigned char * outptr,
const unsigned char * inptr, unsigned int inlen,
const unsigned char * adptr, unsigned int adlen,
const unsigned char * nptr, const unsigned char * kptr,
unsigned char mode);
#include "ascon.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
*mlen = clen - CRYPTO_ABYTES;
return ascon_core(m, c, *mlen, ad, adlen, npub, k, -1);
#include "ascon.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
*clen = mlen + CRYPTO_ABYTES;
return ascon_core(c, m, mlen, ad, adlen, npub, k, 1);
#define CRYPTO_ABYTES 16
#include <xtensa/coreasm.h>
#include "api.h"
#define t0h a4
#define t0l a5
#define x0h a6
#define x0l a7
#define x1h a8
#define x1l a9
#define x2h a10
#define x2l a11
#define x3h a12
#define x3l a13
#define x4h a14
#define x4l a15
#define optr x2h
#define iptr x2l
#define ilen x3h
#define mode x3l
#define t1h x4h
#define t1l x4l
## +-----------+-----------+-----------+------------+-----------+
## | RATE 16 | RATE 8 | RATE 8 | RATE 8 | RATE 8 |
## | PA 12 | PA 12 | PA 12 | PA 12 | PA 12 |
## | PB 8 | PB 6 | PB 6 | PB 8 | PB 12 |
## | KEY 16 | KEY 16 | KEY 20 | | |
## +-----------+-----------+-----------+------------+-----------+
## 0 | bytes | bytes | bytes | bytes | bytes |
## 4 | | | \---- | \---- | \---- | \---- |
## 8 | | | optr | optr | optr | optr |
## 12 | \---- | iptr | iptr | iptr cur | iptr cur |
## 16 | state x2h | state x2h | state x2h | | |
## 20 | | x2l | | x2l | | x2l | state x2l | state x2l |
## 24 | | x3h | | x3h | | x3h | \---- x3h | \---- x3h |
## 28 | | x3l | \---- x3l | \---- x3l | | |
## 32 | | x4h | ilen | ilen | ilen cur | ilen cur |
## 36 | \---- x4l | mode cur | mode cur | olen | olen |
## 40 | key k0h | key k0h | key k1h | | |
## 44 | | k0l | | k0l | | k1l | lr | lr |
## 48 | | k1h | | k1h | | k2h +------------+-----------+
## 52 | \---- k1l | \---- k1l | | k2l |
## 56 | | | \---- k0h |
## 60 | optr cur | optr cur | optr cur |
## 64 | iptr cur | iptr cur | iptr cur |
## 68 | ilen cur | ilen cur | ilen cur |
## 72 | mode cur | lr2 | lr2 |
## 76 | optr | lr | lr |
## 80 | iptr +-----------+-----------+
## 84 | ilen | | |
## 88 | lr2 | | |
## 92 | lr +-----------+-----------+
## 96 +-----------+ kptr arg | kptr arg |
## 100 | | mode arg | mode arg |
## 104 | +-----------+-----------+
## 108 +-----------+
## 112 | kptr arg |
## 116 | mode arg |
## 120 +-----------+
## ASCON128
#define RATE 8
#define PA_ROUNDS 12
#define PA_START_ROUND 0xf0
#define PB_ROUNDS 6
#define PB_START_ROUND 0x96
#define IVh (((8 * CRYPTO_KEYBYTES) << 24) | ((8 * RATE) << 16) | (PA_ROUNDS << 8) | (PB_ROUNDS << 0))
#define IVl 0
#define S_state 16
#define S_key 40
#define S_optr_cur 60
#define S_iptr_cur 64
#define S_ilen_cur 68
#define S_mode_cur 36
#define S_optr 8
#define S_iptr 12
#define S_ilen 32
#define S_lr2 72
#define S_lr 76
#define S_kptr_arg 96
#define S_mode_arg 100
.macro sbox x0, x1, x2, x3, x4, r0, t0, t1, t2
xor \t1, \x0, \x4
xor \t2, \x3, \x4
movi \t0, -1
xor \x4, \x4, \t0
xor \t0, \x1, \x2
or \x4, \x4, \x3
xor \x4, \x4, \t0
xor \x3, \x3, \x1
or \x3, \x3, \t0
xor \x3, \x3, \t1
xor \x2, \x2, \t1
or \x2, \x2, \x1
xor \x2, \x2, \t2
or \x0, \x0, \t2
xor \x0, \x0, \t0
movi \t0, -1
xor \t1, \t1, \t0
and \x1, \x1, \t1
xor \x1, \x1, \t2
mov \r0, \x0
.macro linear dl, dh, sl, sh, sl0, sh0, r0, sl1, sh1, r1, t0
ssai \r0
src \dl, \sh0, \sl0
src \dh, \sl0, \sh0
xor \dl, \dl, \sl
xor \dh, \dh, \sh
ssai \r1
src \t0, \sh1, \sl1
src \sh, \sl1, \sh1
xor \dl, \dl, \t0
xor \dh, \dh, \sh
.align 4
.globl ascon_permute
.type ascon_permute,@function
# ascon permutation
# state in a6 .. a9 and sp + 16 .. sp + 36
# start round in a2
# temporaries in a3, a4, a5
l32i x2h, a1, (S_state + 0)
l32i x2l, a1, (S_state + 4)
l32i x3h, a1, (S_state + 8)
l32i x3l, a1, (S_state + 12)
.globl ascon_permute_noload
.type ascon_permute_noload,@function
# state in a6 .. a15
# start round constant in a2
# round count in a3
# temporaries in a3, a4, a5
# ESP32 zero-overhead looping
floop a3, Ploop
# round constant
xor x2l, x2l, a2
# s-box
sbox x0l, x1l, x2l, x3l, x4l, t0l, t0h, t0l, a3
sbox x0h, x1h, x2h, x3h, x4h, t0h, t0h, x0l, a3
# linear layer
linear x0l, x0h, x2l, x2h, x2l, x2h, 19, x2l, x2h, 28, a3
linear x2l, x2h, x4l, x4h, x4l, x4h, 1, x4l, x4h, 6, a3
linear x4l, x4h, x1l, x1h, x1l, x1h, 7, x1h, x1l, 9, a3
linear x1l, x1h, x3l, x3h, x3h, x3l, 29, x3h, x3l, 7, a3
linear x3l, x3h, t0l, t0h, t0l, t0h, 10, t0l, t0h, 17, a3
# condition
addi a2, a2, -15
floopend a3, Ploop
s32i x2h, a1, (S_state + 0)
s32i x2l, a1, (S_state + 4)
s32i x3h, a1, (S_state + 8)
s32i x3l, a1, (S_state + 12)
.align 4
.globl ascon_rev8
.type ascon_rev8,@function
# ascon bytereverse one block
# arguments and results in a4, a5, a14, a15
# temporaries in a2
ssai 8
srli a2, t1h, 16
src a2, a2, t1h
src a2, a2, a2
src t1h, t1h, a2
srli a2, t1l, 16
src a2, a2, t1l
src a2, a2, a2
src t1l, t1l, a2
.globl ascon_rev8_half
.type ascon_rev8_half,@function
ssai 8
srli a2, t0h, 16
src a2, a2, t0h
src a2, a2, a2
src t0h, t0h, a2
srli a2, t0l, 16
src a2, a2, t0l
src a2, a2, a2
src t0l, t0l, a2
.align 4
.globl ascon_memcpy
.type ascon_memcpy,@function
# memcpy that preserves registers used by ascon
# dest in a2
# src in a3
# temporaries in a4, a5
movi a4, 0
j .LMcond
l8ui a5, a3, 0
s8i a5, a2, 0
addi a2, a2, 1
addi a3, a3, 1
addi a4, a4, 1
bltu a4, ilen, .LMloop
.align 4
.globl ascon_duplex
.type ascon_duplex,@function
s32i a0, a1, S_lr2
j .LDcond
l32i t0h, iptr, 0
l32i t0l, iptr, 4
call0 ascon_rev8_half
xor x0h, x0h, t0h
xor x0l, x0l, t0l
beqz a13, .LDreset
# ascon_rev8
# inlined here to preserve registers
ssai 8
srli a2, x0h, 16
src a2, a2, x0h
src a2, a2, a2
src a2, x0h, a2
s32i a2, optr, 0
srli a2, x0l, 16
src a2, a2, x0l
src a2, a2, a2
src a2, x0l, a2
s32i a2, optr, 4
bgez mode, .LDpermute
mov x0h, t0h
mov x0l, t0l
s32i optr, a1, S_optr_cur
s32i iptr, a1, S_iptr_cur
s32i ilen, a1, S_ilen_cur
movi a3, PB_ROUNDS
call0 ascon_permute
l32i optr, a1, S_optr_cur
l32i iptr, a1, S_iptr_cur
l32i ilen, a1, S_ilen_cur
l32i mode, a1, S_mode_cur
addi optr, optr, RATE
addi iptr, iptr, RATE
addi ilen, ilen, -RATE
bgeui ilen, RATE, .LDloop
movi a2, 0
s32i a2, a1, 0
s32i a2, a1, 4
mov a2, a1
mov a3, iptr
call0 ascon_memcpy
movi a4, 0x80
add a2, a1, ilen
l8ui a3, a2, 0
xor a3, a3, a4
s8i a3, a2, 0
l32i t0h, a1, 0
l32i t0l, a1, 4
call0 ascon_rev8_half
xor x0h, x0h, t0h
xor x0l, x0l, t0l
beqz mode, .LDendreset
mov t0h, x0h
mov t0l, x0l
call0 ascon_rev8_half
s32i t0h, a1, 0
s32i t0l, a1, 4
mov a2, optr
mov a3, a1
call0 ascon_memcpy
bgez mode, .LDreturn
mov a2, a1
mov a3, iptr
call0 ascon_memcpy
l32i t0h, a1, 0
l32i t0l, a1, 4
call0 ascon_rev8_half
mov x0h, t0h
mov x0l, t0l
add optr, optr, ilen
add iptr, iptr, ilen
l32i a0, a1, S_lr2
.align 4
.globl ascon_core
.type ascon_core,@function
abi_entry 80, 4
s32i a0, a1, S_lr
s32i a2, a1, S_optr
s32i a3, a1, S_iptr
s32i a4, a1, S_ilen
s32i a5, a1, S_iptr_cur
s32i a6, a1, S_ilen_cur
# load key
l32i a2, a1, S_kptr_arg
l32i t0h, a2, 0
l32i t0l, a2, 4
l32i t1h, a2, 8
l32i t1l, a2, 12
call0 ascon_rev8
s32i t0h, a1, (S_key + 0)
s32i t0l, a1, (S_key + 4)
s32i t1h, a1, (S_key + 8)
s32i t1l, a1, (S_key + 12)
mov x1h, t0h
mov x1l, t0l
mov x2h, t1h
mov x2l, t1l
# load nonce
# a7 is not clobbered by ascon_rev8
# a7 does not overlap x1, x2, t0, or t1
# x4 overlaps t1, move unnecessary
mov a2, a7
l32i t0h, a2, 0
l32i t0l, a2, 4
l32i t1h, a2, 8
l32i t1l, a2, 12
call0 ascon_rev8
mov x3h, t0h
mov x3l, t0l
# load IV
# this clobbers a7
movi x0h, IVh
movi x0l, IVl
movi a3, PA_ROUNDS
call0 ascon_permute_noload
# xor key
# x4 overlaps t1, do in two steps
l32i t0h, a1, (S_key + 0)
l32i t0l, a1, (S_key + 4)
xor x3h, x3h, t0h
xor x3l, x3l, t0l
l32i t0h, a1, (S_key + 8)
l32i t0l, a1, (S_key + 12)
xor x4h, x4h, t0h
xor x4l, x4l, t0l
# save state
s32i x2h, a1, (S_state + 0)
s32i x2l, a1, (S_state + 4)
s32i x3h, a1, (S_state + 8)
s32i x3l, a1, (S_state + 12)
l32i ilen, a1, S_ilen_cur
beqz ilen, .LCskipad
l32i iptr, a1, S_iptr_cur
movi mode, 0
s32i mode, a1, S_mode_cur
call0 ascon_duplex
movi a3, PB_ROUNDS
call0 ascon_permute
movi a2, 1
xor x4l, x4l, a2
l32i optr, a1, S_optr
l32i iptr, a1, S_iptr
l32i ilen, a1, S_ilen
l8ui mode, a1, S_mode_arg
sext mode, mode, 7
s32i mode, a1, S_mode_cur
call0 ascon_duplex
s32i optr, a1, S_optr_cur
s32i iptr, a1, S_iptr_cur
# restore state
l32i x2h, a1, (S_state + 0)
l32i x2l, a1, (S_state + 4)
l32i x3h, a1, (S_state + 8)
l32i x3l, a1, (S_state + 12)
# xor key
# x4 overlaps t1, do in two steps
l32i t0h, a1, (S_key + 0)
l32i t0l, a1, (S_key + 4)
xor x1h, x1h, t0h
xor x1l, x1l, t0l
l32i t0h, a1, (S_key + 8)
l32i t0l, a1, (S_key + 12)
xor x2h, x2h, t0h
xor x2l, x2l, t0l
movi a3, PA_ROUNDS
call0 ascon_permute_noload
# xor key
# x4 overlaps t1, do in two steps
l32i t0h, a1, (S_key + 0)
l32i t0l, a1, (S_key + 4)
xor x3h, x3h, t0h
xor x3l, x3l, t0l
l32i t0h, a1, (S_key + 8)
l32i t0l, a1, (S_key + 12)
xor x4h, x4h, t0h
xor x4l, x4l, t0l
l32i a2, a1, S_mode_cur
bgez a2, .LCencrypt
# save x4 into x0
# x0 is no longer needed
# x4 overlaps t1
mov x0h, x4h
mov x0l, x4l
l32i a2, a1, S_iptr_cur
l32i t0h, a2, 0
l32i t0l, a2, 4
l32i t1h, a2, 8
l32i t1l, a2, 12
call0 ascon_rev8
# check tag
# x4 is in x0
xor a2, x3h, t0h
xor a3, x3l, t0l
xor a2, a2, a3
xor a3, x0h, t1h
xor a2, a2, a3
xor a3, x0l, t1l
xor a2, a2, a3
beqz a2, .LCzeroreturn
movi a2, -1
j .LCreturn
# store tag
# x4 overlaps t1, move unnecessary
mov t0h, x3h
mov t0l, x3l
call0 ascon_rev8
l32i a2, a1, S_optr_cur
s32i t0h, a2, 0
s32i t0l, a2, 4
s32i t1h, a2, 8
s32i t1l, a2, 12
movi a2, 0
l32i a0, a1, S_lr
#include "api.h"
int ascon_core(unsigned char * outptr,
const unsigned char * inptr, unsigned int inlen,
const unsigned char * adptr, unsigned int adlen,
const unsigned char * nptr, const unsigned char * kptr,
unsigned char mode);
#include "ascon.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
*mlen = clen - CRYPTO_ABYTES;
return ascon_core(m, c, *mlen, ad, adlen, npub, k, -1);
#include "ascon.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
*clen = mlen + CRYPTO_ABYTES;
return ascon_core(c, m, mlen, ad, adlen, npub, k, 1);
#define CRYPTO_ABYTES 16
#include <xtensa/coreasm.h>
#include "api.h"
#define t0h a4
#define t0l a5
#define x0h a6
#define x0l a7
#define x1h a8
#define x1l a9
#define x2h a10
#define x2l a11
#define x3h a12
#define x3l a13
#define x4h a14
#define x4l a15
#define optr x2h
#define iptr x2l
#define ilen x3h
#define mode x3l
#define t1h x4h
#define t1l x4l
## +-----------+-----------+-----------+------------+-----------+
## | RATE 16 | RATE 8 | RATE 8 | RATE 8 | RATE 8 |
## | PA 12 | PA 12 | PA 12 | PA 12 | PA 12 |
## | PB 8 | PB 6 | PB 6 | PB 8 | PB 12 |
## | KEY 16 | KEY 16 | KEY 20 | | |
## +-----------+-----------+-----------+------------+-----------+
## 0 | bytes | bytes | bytes | bytes | bytes |
## 4 | | | \---- | \---- | \---- | \---- |
## 8 | | | optr | optr | optr | optr |
## 12 | \---- | iptr | iptr | iptr cur | iptr cur |
## 16 | state x2h | state x2h | state x2h | | |
## 20 | | x2l | | x2l | | x2l | state x2l | state x2l |
## 24 | | x3h | | x3h | | x3h | \---- x3h | \---- x3h |
## 28 | | x3l | \---- x3l | \---- x3l | | |
## 32 | | x4h | ilen | ilen | ilen cur | ilen cur |
## 36 | \---- x4l | mode cur | mode cur | olen | olen |
## 40 | key k0h | key k0h | key k1h | | |
## 44 | | k0l | | k0l | | k1l | lr | lr |
## 48 | | k1h | | k1h | | k2h +------------+-----------+
## 52 | \---- k1l | \---- k1l | | k2l |
## 56 | | | \---- k0h |
## 60 | optr cur | optr cur | optr cur |
## 64 | iptr cur | iptr cur | iptr cur |
## 68 | ilen cur | ilen cur | ilen cur |
## 72 | mode cur | lr2 | lr2 |
## 76 | optr | lr | lr |
## 80 | iptr +-----------+-----------+
## 84 | ilen | | |
## 88 | lr2 | | |
## 92 | lr +-----------+-----------+
## 96 +-----------+ kptr arg | kptr arg |
## 100 | | mode arg | mode arg |
## 104 | +-----------+-----------+
## 108 +-----------+
## 112 | kptr arg |
## 116 | mode arg |
## 120 +-----------+
#define RATE 8
#define PA_ROUNDS 12
#define PA_START_ROUND 0xf0
#define PB_ROUNDS 6
#define PB_START_ROUND 0x96
#define IVh (((8 * CRYPTO_KEYBYTES) << 24) | ((8 * RATE) << 16) | (PA_ROUNDS << 8) | (PB_ROUNDS << 0))
#define IVl 0
#define S_state 16
#define S_key 40
#define S_optr_cur 60
#define S_iptr_cur 64
#define S_ilen_cur 68
#define S_mode_cur 36
#define S_optr 8
#define S_iptr 12
#define S_ilen 32
#define S_lr2 72
#define S_lr 76
#define S_kptr_arg 96
#define S_mode_arg 100
.macro sbox x0, x1, x2, x3, x4, r0, t0, t1, t2
xor \t1, \x0, \x4
xor \t2, \x3, \x4
movi \t0, -1
xor \x4, \x4, \t0
xor \t0, \x1, \x2
or \x4, \x4, \x3
xor \x4, \x4, \t0
xor \x3, \x3, \x1
or \x3, \x3, \t0
xor \x3, \x3, \t1
xor \x2, \x2, \t1
or \x2, \x2, \x1
xor \x2, \x2, \t2
or \x0, \x0, \t2
xor \x0, \x0, \t0
movi \t0, -1
xor \t1, \t1, \t0
and \x1, \x1, \t1
xor \x1, \x1, \t2
mov \r0, \x0
.macro linear dl, dh, sl, sh, sl0, sh0, r0, sl1, sh1, r1, t0
ssai \r0
src \dl, \sh0, \sl0
src \dh, \sl0, \sh0
xor \dl, \dl, \sl
xor \dh, \dh, \sh
ssai \r1
src \t0, \sh1, \sl1
src \sh, \sl1, \sh1
xor \dl, \dl, \t0
xor \dh, \dh, \sh
.align 4
.globl ascon_permute
.type ascon_permute,@function
# ascon permutation
# state in a6 .. a9 and sp + 16 .. sp + 36
# start round in a2
# temporaries in a3, a4, a5
l32i x2h, a1, (S_state + 0)
l32i x2l, a1, (S_state + 4)
l32i x3h, a1, (S_state + 8)
l32i x3l, a1, (S_state + 12)
.globl ascon_permute_noload
.type ascon_permute_noload,@function
# state in a6 .. a15
# start round constant in a2
# round count in a3
# temporaries in a3, a4, a5
# ESP32 zero-overhead looping
floop a3, Ploop
# round constant
xor x2l, x2l, a2
# s-box
sbox x0l, x1l, x2l, x3l, x4l, t0l, t0h, t0l, a3
sbox x0h, x1h, x2h, x3h, x4h, t0h, t0h, x0l, a3
# linear layer
linear x0l, x0h, x2l, x2h, x2l, x2h, 19, x2l, x2h, 28, a3
linear x2l, x2h, x4l, x4h, x4l, x4h, 1, x4l, x4h, 6, a3
linear x4l, x4h, x1l, x1h, x1l, x1h, 7, x1h, x1l, 9, a3
linear x1l, x1h, x3l, x3h, x3h, x3l, 29, x3h, x3l, 7, a3
linear x3l, x3h, t0l, t0h, t0l, t0h, 10, t0l, t0h, 17, a3
# condition
addi a2, a2, -15
floopend a3, Ploop
s32i x2h, a1, (S_state + 0)
s32i x2l, a1, (S_state + 4)
s32i x3h, a1, (S_state + 8)
s32i x3l, a1, (S_state + 12)
.align 4
.globl ascon_rev8
.type ascon_rev8,@function
# ascon bytereverse one block
# arguments and results in a4, a5, a14, a15
# temporaries in a2
ssai 8
srli a2, t1h, 16
src a2, a2, t1h
src a2, a2, a2
src t1h, t1h, a2
srli a2, t1l, 16
src a2, a2, t1l
src a2, a2, a2
src t1l, t1l, a2
.globl ascon_rev8_half
.type ascon_rev8_half,@function
ssai 8
srli a2, t0h, 16
src a2, a2, t0h
src a2, a2, a2
src t0h, t0h, a2
srli a2, t0l, 16
src a2, a2, t0l
src a2, a2, a2
src t0l, t0l, a2
.align 4
.globl ascon_memcpy
.type ascon_memcpy,@function
# memcpy that preserves registers used by ascon
# dest in a2
# src in a3
# temporaries in a4, a5
movi a4, 0
j .LMcond
l8ui a5, a3, 0
s8i a5, a2, 0
addi a2, a2, 1
addi a3, a3, 1
addi a4, a4, 1
bltu a4, ilen, .LMloop
.align 4
.globl ascon_duplex
.type ascon_duplex,@function
s32i a0, a1, S_lr2
j .LDcond
l32i t0h, iptr, 0
l32i t0l, iptr, 4
call0 ascon_rev8_half
xor x0h, x0h, t0h
xor x0l, x0l, t0l
beqz a13, .LDreset
# ascon_rev8
# inlined here to preserve registers
ssai 8
srli a2, x0h, 16
src a2, a2, x0h
src a2, a2, a2
src a2, x0h, a2
s32i a2, optr, 0
srli a2, x0l, 16
src a2, a2, x0l
src a2, a2, a2
src a2, x0l, a2
s32i a2, optr, 4
bgez mode, .LDpermute
mov x0h, t0h
mov x0l, t0l
s32i optr, a1, S_optr_cur
s32i iptr, a1, S_iptr_cur
s32i ilen, a1, S_ilen_cur
movi a3, PB_ROUNDS
call0 ascon_permute
l32i optr, a1, S_optr_cur
l32i iptr, a1, S_iptr_cur
l32i ilen, a1, S_ilen_cur
l32i mode, a1, S_mode_cur
addi optr, optr, RATE
addi iptr, iptr, RATE
addi ilen, ilen, -RATE
bgeui ilen, RATE, .LDloop
movi a2, 0
s32i a2, a1, 0
s32i a2, a1, 4
mov a2, a1
mov a3, iptr
call0 ascon_memcpy
movi a4, 0x80
add a2, a1, ilen
l8ui a3, a2, 0
xor a3, a3, a4
s8i a3, a2, 0
l32i t0h, a1, 0
l32i t0l, a1, 4
call0 ascon_rev8_half
xor x0h, x0h, t0h
xor x0l, x0l, t0l
beqz mode, .LDendreset
mov t0h, x0h
mov t0l, x0l
call0 ascon_rev8_half
s32i t0h, a1, 0
s32i t0l, a1, 4
mov a2, optr
mov a3, a1
call0 ascon_memcpy
bgez mode, .LDreturn
mov a2, a1
mov a3, iptr
call0 ascon_memcpy
l32i t0h, a1, 0
l32i t0l, a1, 4
call0 ascon_rev8_half
mov x0h, t0h
mov x0l, t0l
add optr, optr, ilen
add iptr, iptr, ilen
l32i a0, a1, S_lr2
.align 4
.globl ascon_core
.type ascon_core,@function
abi_entry 80, 4
s32i a0, a1, S_lr
s32i a2, a1, S_optr
s32i a3, a1, S_iptr
s32i a4, a1, S_ilen
s32i a5, a1, S_iptr_cur
s32i a6, a1, S_ilen_cur
# load key
l32i a2, a1, S_kptr_arg
l32i t0h, a2, 0
ssai 8
srli t0l, t0h, 16
src t0l, t0l, t0h
src t0l, t0l, t0l
src t0h, t0h, t0l
s32i t0h, a1, (S_key + 16)
l32i t0h, a2, 4
l32i t0l, a2, 8
l32i t1h, a2, 12
l32i t1l, a2, 16
call0 ascon_rev8
s32i t0h, a1, (S_key + 0)
s32i t0l, a1, (S_key + 4)
s32i t1h, a1, (S_key + 8)
s32i t1l, a1, (S_key + 12)
mov x1h, t0h
mov x1l, t0l
mov x2h, t1h
mov x2l, t1l
# load nonce
# a7 is not clobbered by ascon_rev8
# a7 does not overlap x1, x2, t0, or t1
# x4 overlaps t1, move unnecessary
mov a2, a7
l32i t0h, a2, 0
l32i t0l, a2, 4
l32i t1h, a2, 8
l32i t1l, a2, 12
call0 ascon_rev8
mov x3h, t0h
mov x3l, t0l
# load IV
movi x0h, IVh
# load K0.h
# this clobbers a7
l32i x0l, a1, (S_key + 16)
movi a3, PA_ROUNDS
call0 ascon_permute_noload
# xor key
# x4 overlaps t1, do in two steps
l32i t0h, a1, (S_key + 16)
xor x2l, x2l, t0h
l32i t0h, a1, (S_key + 0)
l32i t0l, a1, (S_key + 4)
xor x3h, x3h, t0h
xor x3l, x3l, t0l
l32i t0h, a1, (S_key + 8)
l32i t0l, a1, (S_key + 12)
xor x4h, x4h, t0h
xor x4l, x4l, t0l
# save state
s32i x2h, a1, (S_state + 0)
s32i x2l, a1, (S_state + 4)
s32i x3h, a1, (S_state + 8)
s32i x3l, a1, (S_state + 12)
l32i ilen, a1, S_ilen_cur
beqz ilen, .LCskipad
l32i iptr, a1, S_iptr_cur
movi mode, 0
s32i mode, a1, S_mode_cur
call0 ascon_duplex
movi a3, PB_ROUNDS
call0 ascon_permute
movi a2, 1
xor x4l, x4l, a2
l32i optr, a1, S_optr
l32i iptr, a1, S_iptr
l32i ilen, a1, S_ilen
l8ui mode, a1, S_mode_arg
sext mode, mode, 7
s32i mode, a1, S_mode_cur
call0 ascon_duplex
s32i optr, a1, S_optr_cur
s32i iptr, a1, S_iptr_cur
# restore state
l32i x2h, a1, (S_state + 0)
l32i x2l, a1, (S_state + 4)
l32i x3h, a1, (S_state + 8)
l32i x3l, a1, (S_state + 12)
# xor key
# x4 overlaps t1, do in two steps
l32i t0h, a1, (S_key + 16)
xor x1h, x1h, t0h
l32i t0h, a1, (S_key + 0)
l32i t0l, a1, (S_key + 4)
xor x1l, x1l, t0h
xor x2h, x2h, t0l
l32i t0h, a1, (S_key + 8)
l32i t0l, a1, (S_key + 12)
xor x2l, x2l, t0h
xor x3h, x3h, t0l
movi a3, PA_ROUNDS
call0 ascon_permute_noload
# xor key
# x4 overlaps t1, do in two steps
l32i t0h, a1, (S_key + 0)
l32i t0l, a1, (S_key + 4)
xor x3h, x3h, t0h
xor x3l, x3l, t0l
l32i t0h, a1, (S_key + 8)
l32i t0l, a1, (S_key + 12)
xor x4h, x4h, t0h
xor x4l, x4l, t0l
l32i a2, a1, S_mode_cur
bgez a2, .LCencrypt
# save x4 into x0
# x0 is no longer needed
# x4 overlaps t1
mov x0h, x4h
mov x0l, x4l
l32i a2, a1, S_iptr_cur
l32i t0h, a2, 0
l32i t0l, a2, 4
l32i t1h, a2, 8
l32i t1l, a2, 12
call0 ascon_rev8
# check tag
# x4 is in x0
xor a2, x3h, t0h
xor a3, x3l, t0l
xor a2, a2, a3
xor a3, x0h, t1h
xor a2, a2, a3
xor a3, x0l, t1l
xor a2, a2, a3
beqz a2, .LCzeroreturn
movi a2, -1
j .LCreturn
# store tag
# x4 overlaps t1, move unnecessary
mov t0h, x3h
mov t0l, x3l
call0 ascon_rev8
l32i a2, a1, S_optr_cur
s32i t0h, a2, 0
s32i t0l, a2, 4
s32i t1h, a2, 8
s32i t1l, a2, 12
movi a2, 0
l32i a0, a1, S_lr
#include "api.h"
int ascon_core(unsigned char * outptr,
const unsigned char * inptr, unsigned int inlen,
const unsigned char * adptr, unsigned int adlen,
const unsigned char * nptr, const unsigned char * kptr,
unsigned char mode);
#include "ascon.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
*mlen = clen - CRYPTO_ABYTES;
return ascon_core(m, c, *mlen, ad, adlen, npub, k, -1);
#include "ascon.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
*clen = mlen + CRYPTO_ABYTES;
return ascon_core(c, m, mlen, ad, adlen, npub, k, 1);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment