diff --git a/ascon/Implementations/crypto_aead/ascon128av12/asm_esp32/api.h b/ascon/Implementations/crypto_aead/ascon128av12/asm_esp32/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/asm_esp32/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/ascon/Implementations/crypto_aead/ascon128av12/asm_esp32/ascon.S b/ascon/Implementations/crypto_aead/ascon128av12/asm_esp32/ascon.S new file mode 100644 index 0000000..4dbdf15 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/asm_esp32/ascon.S @@ -0,0 +1,544 @@ +#include +#include "api.h" + +## REGISTER ALLOCATION +#define t0h a4 +#define t0l a5 +#define x0h a6 +#define x0l a7 +#define x1h a8 +#define x1l a9 +#define x2h a10 +#define x2l a11 +#define x3h a12 +#define x3l a13 +#define x4h a14 +#define x4l a15 +## OVERLAPPING REGISTER ALLOCATION +#define optr x2h +#define iptr x2l +#define ilen x3h +#define mode x3l +#define t1h x4h +#define t1l x4l + +## STACK FRAME LAYOUT +## +-----------+-----------+-----------+------------+-----------+ +## | ASCON128a | ASCON128 | ASCON80PQ | ASCONHASHa | ASCONHASH | +## | RATE 16 | RATE 8 | RATE 8 | RATE 8 | RATE 8 | +## | PA 12 | PA 12 | PA 12 | PA 12 | PA 12 | +## | PB 8 | PB 6 | PB 6 | PB 8 | PB 12 | +## | KEY 16 | KEY 16 | KEY 20 | | | +## +-----------+-----------+-----------+------------+-----------+ +## 0 | bytes | bytes | bytes | bytes | bytes | +## 4 | | | \---- | \---- | \---- | \---- | +## 8 | | | optr | optr | optr | optr | +## 12 | \---- | iptr | iptr | iptr cur | iptr cur | +## 16 | state x2h | state x2h | state x2h | | | +## 20 | | x2l | | x2l | | x2l | state x2l | state x2l | +## 24 | | x3h | | x3h | | x3h | \---- x3h | \---- x3h | +## 28 | | x3l | \---- x3l | \---- x3l | | | +## 32 | | x4h | ilen | ilen | ilen cur | ilen cur | +## 36 | \---- x4l | mode cur | mode cur | olen | olen | +## 40 | key k0h | key k0h | key k1h | | | +## 44 | | k0l | | k0l | | k1l | lr | lr | +## 48 | | k1h | | k1h | | k2h +------------+-----------+ +## 52 | \---- k1l | \---- k1l | | k2l | +## 56 | | | \---- k0h | +## 60 | optr cur | optr cur | optr cur | +## 64 | iptr cur | iptr cur | iptr cur | +## 68 | ilen cur | ilen cur | ilen cur | +## 72 | mode cur | lr2 | lr2 | +## 76 | optr | lr | lr | +## 80 | iptr +-----------+-----------+ +## 84 | ilen | | | +## 88 | lr2 | | | +## 92 | lr +-----------+-----------+ +## 96 +-----------+ kptr arg | kptr arg | +## 100 | | mode arg | mode arg | +## 104 | +-----------+-----------+ +## 108 +-----------+ +## 112 | kptr arg | +## 116 | mode arg | +## 120 +-----------+ + +## ASCON128a +#define RATE 16 +#define PA_ROUNDS 12 +#define PA_START_ROUND 0xf0 +#define PB_ROUNDS 8 +#define PB_START_ROUND 0xb4 +#define IVh (((8 * CRYPTO_KEYBYTES) << 24) | ((8 * RATE) << 16) | (PA_ROUNDS << 8) | (PB_ROUNDS << 0)) +#define IVl 0 + +#define S_state 16 +#define S_key 40 +#define S_optr_cur 60 +#define S_iptr_cur 64 +#define S_ilen_cur 68 +#define S_mode_cur 72 +#define S_optr 76 +#define S_iptr 80 +#define S_ilen 84 +#define S_lr2 88 +#define S_lr 92 +#define S_kptr_arg 112 +#define S_mode_arg 116 + +.macro sbox x0, x1, x2, x3, x4, t0, t1, t2 + xor \t2, \x3, \x4 + xor \t1, \x0, \x4 + movi \t0, -1 + xor \x4, \x4, \t0 + xor \t0, \x1, \x2 + or \x4, \x4, \x3 + xor \x4, \x4, \t0 + xor \x3, \x3, \x1 + or \x3, \x3, \t0 + xor \x3, \x3, \t1 + xor \x2, \x2, \t1 + or \x2, \x2, \x1 + xor \x2, \x2, \t2 + or \x0, \x0, \t2 + xor \t0, \t0, \x0 + and \t1, \t1, \x1 + xor \x1, \x1, \t1 + xor \x1, \x1, \t2 +.endm + +.macro linear dl, dh, sl, sh, sl0, sh0, r0, sl1, sh1, r1, t0 + ssai \r0 + src \dl, \sh0, \sl0 + src \dh, \sl0, \sh0 + xor \dl, \dl, \sl + xor \dh, \dh, \sh + ssai \r1 + src \t0, \sh1, \sl1 + src \sh, \sl1, \sh1 + xor \dl, \dl, \t0 + xor \dh, \dh, \sh +.endm + +.align 4 +.globl ascon_permute +.type ascon_permute,@function +ascon_permute: + # ascon permutation + # state in a6 .. a9 and sp + 16 .. sp + 36 + # start round in a2 + # temporaries in a3, a4, a5 + l32i x2h, a1, (S_state + 0) + l32i x2l, a1, (S_state + 4) + l32i x3h, a1, (S_state + 8) + l32i x3l, a1, (S_state + 12) + l32i x4h, a1, (S_state + 16) + l32i x4l, a1, (S_state + 20) +.align 4 +.globl ascon_permute_noload +.type ascon_permute_noload,@function +ascon_permute_noload: + # state in a6 .. a15 + # start round constant in a2 + # round count in a3 + # temporaries in a3, a4, a5 + + # ESP32 zero-overhead looping + floop a3, Ploop +.LPloop: + # round constant + xor x2l, x2l, a2 + + # s-box + sbox x0l, x1l, x2l, x3l, x4l, t0l, t0h, a3 + sbox x0h, x1h, x2h, x3h, x4h, t0h, x0l, a3 + + # linear layer + linear x0l, x0h, x2l, x2h, x2l, x2h, 19, x2l, x2h, 28, a3 + linear x2l, x2h, x4l, x4h, x4l, x4h, 1, x4l, x4h, 6, a3 + linear x4l, x4h, x1l, x1h, x1l, x1h, 7, x1h, x1l, 9, a3 + linear x1l, x1h, x3l, x3h, x3h, x3l, 29, x3h, x3l, 7, a3 + linear x3l, x3h, t0l, t0h, t0l, t0h, 10, t0l, t0h, 17, a3 + + # condition + addi a2, a2, -15 + + floopend a3, Ploop +.LPend: + s32i x2h, a1, (S_state + 0) + s32i x2l, a1, (S_state + 4) + s32i x3h, a1, (S_state + 8) + s32i x3l, a1, (S_state + 12) + s32i x4h, a1, (S_state + 16) + s32i x4l, a1, (S_state + 20) + ret + +.align 4 +.globl ascon_rev8 +.type ascon_rev8,@function +ascon_rev8: + # ascon bytereverse one block + # arguments and results in a4, a5, a14, a15 + # temporaries in a2 + ssai 8 + srli a2, t1h, 16 + src a2, a2, t1h + src a2, a2, a2 + src t1h, t1h, a2 + + srli a2, t1l, 16 + src a2, a2, t1l + src a2, a2, a2 + src t1l, t1l, a2 + + srli a2, t0h, 16 + src a2, a2, t0h + src a2, a2, a2 + src t0h, t0h, a2 + + srli a2, t0l, 16 + src a2, a2, t0l + src a2, a2, a2 + src t0l, t0l, a2 + + ret + +.align 4 +.globl ascon_memcpy +.type ascon_memcpy,@function +ascon_memcpy: + # memcpy that preserves registers used by ascon + # dest in a2 + # src in a3 + # temporaries in a4, a5 + movi a4, 0 + j .LMcond +.LMloop: + l8ui a5, a3, 0 + s8i a5, a2, 0 + addi a2, a2, 1 + addi a3, a3, 1 + addi a4, a4, 1 +.LMcond: + bltu a4, ilen, .LMloop +.LMend: + ret + +.align 4 +.globl ascon_duplex +.type ascon_duplex,@function +ascon_duplex: + s32i a0, a1, S_lr2 + j .LDcond + +.LDloop: + l32i t0h, iptr, 0 + l32i t0l, iptr, 4 + l32i t1h, iptr, 8 + l32i t1l, iptr, 12 + call0 ascon_rev8 + xor x0h, x0h, t0h + xor x0l, x0l, t0l + xor x1h, x1h, t1h + xor x1l, x1l, t1l + +.LDsqueeze: + beqz a13, .LDreset + + # ascon_rev8 + # inlined here to preserve registers + ssai 8 + srli a2, x0h, 16 + src a2, a2, x0h + src a2, a2, a2 + src a2, x0h, a2 + s32i a2, optr, 0 + + srli a2, x0l, 16 + src a2, a2, x0l + src a2, a2, a2 + src a2, x0l, a2 + s32i a2, optr, 4 + + srli a2, x1h, 16 + src a2, a2, x1h + src a2, a2, a2 + src a2, x1h, a2 + s32i a2, optr, 8 + + srli a2, x1l, 16 + src a2, a2, x1l + src a2, a2, a2 + src a2, x1l, a2 + s32i a2, optr, 12 + +.LDreset: + bgez mode, .LDpermute + mov x0h, t0h + mov x0l, t0l + mov x1h, t1h + mov x1l, t1l + +.LDpermute: + s32i optr, a1, S_optr_cur + s32i iptr, a1, S_iptr_cur + s32i ilen, a1, S_ilen_cur + movi a2, PB_START_ROUND + movi a3, PB_ROUNDS + call0 ascon_permute + l32i optr, a1, S_optr_cur + l32i iptr, a1, S_iptr_cur + l32i ilen, a1, S_ilen_cur + l32i mode, a1, S_mode_cur + + addi optr, optr, RATE + addi iptr, iptr, RATE + addi ilen, ilen, -RATE + +.LDcond: + bgeui ilen, RATE, .LDloop + +.LDend: + movi a2, 0 + s32i a2, a1, 0 + s32i a2, a1, 4 + s32i a2, a1, 8 + s32i a2, a1, 12 + + mov a2, a1 + mov a3, iptr + call0 ascon_memcpy + + movi a4, 0x80 + add a2, a1, ilen + l8ui a3, a2, 0 + xor a3, a3, a4 + s8i a3, a2, 0 + + l32i t0h, a1, 0 + l32i t0l, a1, 4 + l32i t1h, a1, 8 + l32i t1l, a1, 12 + call0 ascon_rev8 + xor x0h, x0h, t0h + xor x0l, x0l, t0l + xor x1h, x1h, t1h + xor x1l, x1l, t1l + +.LDendsqueeze: + beqz mode, .LDendreset + + mov t0h, x0h + mov t0l, x0l + mov t1h, x1h + mov t1l, x1l + call0 ascon_rev8 + s32i t0h, a1, 0 + s32i t0l, a1, 4 + s32i t1h, a1, 8 + s32i t1l, a1, 12 + + mov a2, optr + mov a3, a1 + call0 ascon_memcpy + +.LDendreset: + bgez mode, .LDreturn + + mov a2, a1 + mov a3, iptr + call0 ascon_memcpy + + l32i t0h, a1, 0 + l32i t0l, a1, 4 + l32i t1h, a1, 8 + l32i t1l, a1, 12 + call0 ascon_rev8 + mov x0h, t0h + mov x0l, t0l + mov x1h, t1h + mov x1l, t1l + +.LDreturn: + add optr, optr, ilen + add iptr, iptr, ilen + l32i a0, a1, S_lr2 + ret + +.align 4 +.globl ascon_core +.type ascon_core,@function +ascon_core: + abi_entry 96, 4 + s32i a0, a1, S_lr + s32i a2, a1, S_optr + s32i a3, a1, S_iptr + s32i a4, a1, S_ilen + s32i a5, a1, S_iptr_cur + s32i a6, a1, S_ilen_cur + + # load key + l32i a2, a1, S_kptr_arg + l32i t0h, a2, 0 + l32i t0l, a2, 4 + l32i t1h, a2, 8 + l32i t1l, a2, 12 + call0 ascon_rev8 + s32i t0h, a1, (S_key + 0) + s32i t0l, a1, (S_key + 4) + s32i t1h, a1, (S_key + 8) + s32i t1l, a1, (S_key + 12) + mov x1h, t0h + mov x1l, t0l + mov x2h, t1h + mov x2l, t1l + + # load nonce + # a7 is not clobbered by ascon_rev8 + # a7 does not overlap x1, x2, t0, or t1 + # x4 overlaps t1, move unnecessary + mov a2, a7 + l32i t0h, a2, 0 + l32i t0l, a2, 4 + l32i t1h, a2, 8 + l32i t1l, a2, 12 + call0 ascon_rev8 + mov x3h, t0h + mov x3l, t0l + + # load IV + # this clobbers a7 + movi x0h, IVh + movi x0l, IVl + + movi a2, PA_START_ROUND + movi a3, PA_ROUNDS + call0 ascon_permute_noload + + # xor key + # x4 overlaps t1, do in two steps + l32i t0h, a1, (S_key + 0) + l32i t0l, a1, (S_key + 4) + xor x3h, x3h, t0h + xor x3l, x3l, t0l + l32i t0h, a1, (S_key + 8) + l32i t0l, a1, (S_key + 12) + xor x4h, x4h, t0h + xor x4l, x4l, t0l + + # save state + s32i x2h, a1, (S_state + 0) + s32i x2l, a1, (S_state + 4) + s32i x3h, a1, (S_state + 8) + s32i x3l, a1, (S_state + 12) + s32i x4h, a1, (S_state + 16) + s32i x4l, a1, (S_state + 20) + + l32i ilen, a1, S_ilen_cur + beqz ilen, .LCskipad + + l32i iptr, a1, S_iptr_cur + movi mode, 0 + s32i mode, a1, S_mode_cur + call0 ascon_duplex + + movi a2, PB_START_ROUND + movi a3, PB_ROUNDS + call0 ascon_permute + +.LCskipad: + movi a2, 1 + xor x4l, x4l, a2 + s32i x4l, a1, (S_state + 20) + + l32i optr, a1, S_optr + l32i iptr, a1, S_iptr + l32i ilen, a1, S_ilen + l8ui mode, a1, S_mode_arg + sext mode, mode, 7 + s32i mode, a1, S_mode_cur + call0 ascon_duplex + s32i optr, a1, S_optr_cur + s32i iptr, a1, S_iptr_cur + + # restore state + l32i x2h, a1, (S_state + 0) + l32i x2l, a1, (S_state + 4) + l32i x3h, a1, (S_state + 8) + l32i x3l, a1, (S_state + 12) + l32i x4h, a1, (S_state + 16) + l32i x4l, a1, (S_state + 20) + + # xor key + # x4 overlaps t1, do in two steps + l32i t0h, a1, (S_key + 0) + l32i t0l, a1, (S_key + 4) + xor x2h, x2h, t0h + xor x2l, x2l, t0l + l32i t0h, a1, (S_key + 8) + l32i t0l, a1, (S_key + 12) + xor x3h, x3h, t0h + xor x3l, x3l, t0l + + movi a2, PA_START_ROUND + movi a3, PA_ROUNDS + call0 ascon_permute_noload + + # xor key + # x4 overlaps t1, do in two steps + l32i t0h, a1, (S_key + 0) + l32i t0l, a1, (S_key + 4) + xor x3h, x3h, t0h + xor x3l, x3l, t0l + l32i t0h, a1, (S_key + 8) + l32i t0l, a1, (S_key + 12) + xor x4h, x4h, t0h + xor x4l, x4l, t0l + + l32i a2, a1, S_mode_cur + bgez a2, .LCencrypt +.LCdecrypt: + + # save x4 into x0 + # x0 is no longer needed + # x4 overlaps t1 + mov x0h, x4h + mov x0l, x4l + + l32i a2, a1, S_iptr_cur + l32i t0h, a2, 0 + l32i t0l, a2, 4 + l32i t1h, a2, 8 + l32i t1l, a2, 12 + call0 ascon_rev8 + + # check tag + # x4 is in x0 + xor a2, x3h, t0h + xor a3, x3l, t0l + xor a2, a2, a3 + xor a3, x0h, t1h + xor a2, a2, a3 + xor a3, x0l, t1l + xor a2, a2, a3 + + beqz a2, .LCzeroreturn + movi a2, -1 + j .LCreturn +.LCencrypt: + + # store tag + # x4 overlaps t1, move unnecessary + mov t0h, x3h + mov t0l, x3l + call0 ascon_rev8 + l32i a2, a1, S_optr_cur + s32i t0h, a2, 0 + s32i t0l, a2, 4 + s32i t1h, a2, 8 + s32i t1l, a2, 12 + +.LCzeroreturn: + movi a2, 0 +.LCreturn: + l32i a0, a1, S_lr + abi_return diff --git a/ascon/Implementations/crypto_aead/ascon128av12/asm_esp32/ascon.h b/ascon/Implementations/crypto_aead/ascon128av12/asm_esp32/ascon.h new file mode 100644 index 0000000..88f4da3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/asm_esp32/ascon.h @@ -0,0 +1,7 @@ +#include "api.h" + +int ascon_core(unsigned char * outptr, + const unsigned char * inptr, unsigned int inlen, + const unsigned char * adptr, unsigned int adlen, + const unsigned char * nptr, const unsigned char * kptr, + unsigned char mode); diff --git a/ascon/Implementations/crypto_aead/ascon128av12/asm_esp32/decrypt.c b/ascon/Implementations/crypto_aead/ascon128av12/asm_esp32/decrypt.c new file mode 100644 index 0000000..4ee4528 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/asm_esp32/decrypt.c @@ -0,0 +1,17 @@ +#include "ascon.h" + +int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen, + unsigned char* nsec, const unsigned char* c, + unsigned long long clen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k) { + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + + *mlen = clen - CRYPTO_ABYTES; + (void)nsec; + + return ascon_core(m, c, *mlen, ad, adlen, npub, k, -1); +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/asm_esp32/encrypt.c b/ascon/Implementations/crypto_aead/ascon128av12/asm_esp32/encrypt.c new file mode 100644 index 0000000..a6f95db --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/asm_esp32/encrypt.c @@ -0,0 +1,13 @@ +#include "ascon.h" + +int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, const unsigned char* npub, + const unsigned char* k) { + + *clen = mlen + CRYPTO_ABYTES; + (void)nsec; + + return ascon_core(c, m, mlen, ad, adlen, npub, k, 1); +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/asm_esp32/implementors b/ascon/Implementations/crypto_aead/ascon128av12/asm_esp32/implementors new file mode 100644 index 0000000..1b9a187 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/asm_esp32/implementors @@ -0,0 +1 @@ +Ferdinand Bachmann diff --git a/ascon/Implementations/crypto_aead/ascon128v12/asm_esp32/api.h b/ascon/Implementations/crypto_aead/ascon128v12/asm_esp32/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/asm_esp32/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/ascon/Implementations/crypto_aead/ascon128v12/asm_esp32/ascon.S b/ascon/Implementations/crypto_aead/ascon128v12/asm_esp32/ascon.S new file mode 100644 index 0000000..009638e --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/asm_esp32/ascon.S @@ -0,0 +1,508 @@ +#include +#include "api.h" + +## REGISTER ALLOCATION +#define t0h a4 +#define t0l a5 +#define x0h a6 +#define x0l a7 +#define x1h a8 +#define x1l a9 +#define x2h a10 +#define x2l a11 +#define x3h a12 +#define x3l a13 +#define x4h a14 +#define x4l a15 +## OVERLAPPING REGISTER ALLOCATION +#define optr x2h +#define iptr x2l +#define ilen x3h +#define mode x3l +#define t1h x4h +#define t1l x4l + +## STACK FRAME LAYOUT +## +-----------+-----------+-----------+------------+-----------+ +## | ASCON128a | ASCON128 | ASCON80PQ | ASCONHASHa | ASCONHASH | +## | RATE 16 | RATE 8 | RATE 8 | RATE 8 | RATE 8 | +## | PA 12 | PA 12 | PA 12 | PA 12 | PA 12 | +## | PB 8 | PB 6 | PB 6 | PB 8 | PB 12 | +## | KEY 16 | KEY 16 | KEY 20 | | | +## +-----------+-----------+-----------+------------+-----------+ +## 0 | bytes | bytes | bytes | bytes | bytes | +## 4 | | | \---- | \---- | \---- | \---- | +## 8 | | | optr | optr | optr | optr | +## 12 | \---- | iptr | iptr | iptr cur | iptr cur | +## 16 | state x2h | state x2h | state x2h | | | +## 20 | | x2l | | x2l | | x2l | state x2l | state x2l | +## 24 | | x3h | | x3h | | x3h | \---- x3h | \---- x3h | +## 28 | | x3l | \---- x3l | \---- x3l | | | +## 32 | | x4h | ilen | ilen | ilen cur | ilen cur | +## 36 | \---- x4l | mode cur | mode cur | olen | olen | +## 40 | key k0h | key k0h | key k1h | | | +## 44 | | k0l | | k0l | | k1l | lr | lr | +## 48 | | k1h | | k1h | | k2h +------------+-----------+ +## 52 | \---- k1l | \---- k1l | | k2l | +## 56 | | | \---- k0h | +## 60 | optr cur | optr cur | optr cur | +## 64 | iptr cur | iptr cur | iptr cur | +## 68 | ilen cur | ilen cur | ilen cur | +## 72 | mode cur | lr2 | lr2 | +## 76 | optr | lr | lr | +## 80 | iptr +-----------+-----------+ +## 84 | ilen | | | +## 88 | lr2 | | | +## 92 | lr +-----------+-----------+ +## 96 +-----------+ kptr arg | kptr arg | +## 100 | | mode arg | mode arg | +## 104 | +-----------+-----------+ +## 108 +-----------+ +## 112 | kptr arg | +## 116 | mode arg | +## 120 +-----------+ + +## ASCON128 +#define RATE 8 +#define PA_ROUNDS 12 +#define PA_START_ROUND 0xf0 +#define PB_ROUNDS 6 +#define PB_START_ROUND 0x96 +#define IVh (((8 * CRYPTO_KEYBYTES) << 24) | ((8 * RATE) << 16) | (PA_ROUNDS << 8) | (PB_ROUNDS << 0)) +#define IVl 0 + +#define S_state 16 +#define S_key 40 +#define S_optr_cur 60 +#define S_iptr_cur 64 +#define S_ilen_cur 68 +#define S_mode_cur 36 +#define S_optr 8 +#define S_iptr 12 +#define S_ilen 32 +#define S_lr2 72 +#define S_lr 76 +#define S_kptr_arg 96 +#define S_mode_arg 100 + +.macro sbox x0, x1, x2, x3, x4, r0, t0, t1, t2 + xor \t1, \x0, \x4 + xor \t2, \x3, \x4 + movi \t0, -1 + xor \x4, \x4, \t0 + xor \t0, \x1, \x2 + or \x4, \x4, \x3 + xor \x4, \x4, \t0 + xor \x3, \x3, \x1 + or \x3, \x3, \t0 + xor \x3, \x3, \t1 + xor \x2, \x2, \t1 + or \x2, \x2, \x1 + xor \x2, \x2, \t2 + or \x0, \x0, \t2 + xor \x0, \x0, \t0 + movi \t0, -1 + xor \t1, \t1, \t0 + and \x1, \x1, \t1 + xor \x1, \x1, \t2 + mov \r0, \x0 +.endm + +.macro linear dl, dh, sl, sh, sl0, sh0, r0, sl1, sh1, r1, t0 + ssai \r0 + src \dl, \sh0, \sl0 + src \dh, \sl0, \sh0 + xor \dl, \dl, \sl + xor \dh, \dh, \sh + ssai \r1 + src \t0, \sh1, \sl1 + src \sh, \sl1, \sh1 + xor \dl, \dl, \t0 + xor \dh, \dh, \sh +.endm + +.align 4 +.globl ascon_permute +.type ascon_permute,@function +ascon_permute: + # ascon permutation + # state in a6 .. a9 and sp + 16 .. sp + 36 + # start round in a2 + # temporaries in a3, a4, a5 + l32i x2h, a1, (S_state + 0) + l32i x2l, a1, (S_state + 4) + l32i x3h, a1, (S_state + 8) + l32i x3l, a1, (S_state + 12) +.globl ascon_permute_noload +.type ascon_permute_noload,@function +ascon_permute_noload: + # state in a6 .. a15 + # start round constant in a2 + # round count in a3 + # temporaries in a3, a4, a5 + + # ESP32 zero-overhead looping + floop a3, Ploop +.LPloop: + # round constant + xor x2l, x2l, a2 + + # s-box + sbox x0l, x1l, x2l, x3l, x4l, t0l, t0h, t0l, a3 + sbox x0h, x1h, x2h, x3h, x4h, t0h, t0h, x0l, a3 + + # linear layer + linear x0l, x0h, x2l, x2h, x2l, x2h, 19, x2l, x2h, 28, a3 + linear x2l, x2h, x4l, x4h, x4l, x4h, 1, x4l, x4h, 6, a3 + linear x4l, x4h, x1l, x1h, x1l, x1h, 7, x1h, x1l, 9, a3 + linear x1l, x1h, x3l, x3h, x3h, x3l, 29, x3h, x3l, 7, a3 + linear x3l, x3h, t0l, t0h, t0l, t0h, 10, t0l, t0h, 17, a3 + + # condition + addi a2, a2, -15 + + floopend a3, Ploop +.LPend: + s32i x2h, a1, (S_state + 0) + s32i x2l, a1, (S_state + 4) + s32i x3h, a1, (S_state + 8) + s32i x3l, a1, (S_state + 12) + ret + +.align 4 +.globl ascon_rev8 +.type ascon_rev8,@function +ascon_rev8: + # ascon bytereverse one block + # arguments and results in a4, a5, a14, a15 + # temporaries in a2 + ssai 8 + srli a2, t1h, 16 + src a2, a2, t1h + src a2, a2, a2 + src t1h, t1h, a2 + + srli a2, t1l, 16 + src a2, a2, t1l + src a2, a2, a2 + src t1l, t1l, a2 + +.globl ascon_rev8_half +.type ascon_rev8_half,@function +ascon_rev8_half: + ssai 8 + srli a2, t0h, 16 + src a2, a2, t0h + src a2, a2, a2 + src t0h, t0h, a2 + + srli a2, t0l, 16 + src a2, a2, t0l + src a2, a2, a2 + src t0l, t0l, a2 + + ret + +.align 4 +.globl ascon_memcpy +.type ascon_memcpy,@function +ascon_memcpy: + # memcpy that preserves registers used by ascon + # dest in a2 + # src in a3 + # temporaries in a4, a5 + movi a4, 0 + j .LMcond +.LMloop: + l8ui a5, a3, 0 + s8i a5, a2, 0 + addi a2, a2, 1 + addi a3, a3, 1 + addi a4, a4, 1 +.LMcond: + bltu a4, ilen, .LMloop +.LMend: + ret + +.align 4 +.globl ascon_duplex +.type ascon_duplex,@function +ascon_duplex: + s32i a0, a1, S_lr2 + j .LDcond + +.LDloop: + l32i t0h, iptr, 0 + l32i t0l, iptr, 4 + call0 ascon_rev8_half + xor x0h, x0h, t0h + xor x0l, x0l, t0l + +.LDsqueeze: + beqz a13, .LDreset + + # ascon_rev8 + # inlined here to preserve registers + ssai 8 + srli a2, x0h, 16 + src a2, a2, x0h + src a2, a2, a2 + src a2, x0h, a2 + s32i a2, optr, 0 + + srli a2, x0l, 16 + src a2, a2, x0l + src a2, a2, a2 + src a2, x0l, a2 + s32i a2, optr, 4 + +.LDreset: + bgez mode, .LDpermute + mov x0h, t0h + mov x0l, t0l + +.LDpermute: + s32i optr, a1, S_optr_cur + s32i iptr, a1, S_iptr_cur + s32i ilen, a1, S_ilen_cur + movi a2, PB_START_ROUND + movi a3, PB_ROUNDS + call0 ascon_permute + l32i optr, a1, S_optr_cur + l32i iptr, a1, S_iptr_cur + l32i ilen, a1, S_ilen_cur + l32i mode, a1, S_mode_cur + + addi optr, optr, RATE + addi iptr, iptr, RATE + addi ilen, ilen, -RATE + +.LDcond: + bgeui ilen, RATE, .LDloop + +.LDend: + movi a2, 0 + s32i a2, a1, 0 + s32i a2, a1, 4 + + mov a2, a1 + mov a3, iptr + call0 ascon_memcpy + + movi a4, 0x80 + add a2, a1, ilen + l8ui a3, a2, 0 + xor a3, a3, a4 + s8i a3, a2, 0 + + l32i t0h, a1, 0 + l32i t0l, a1, 4 + call0 ascon_rev8_half + xor x0h, x0h, t0h + xor x0l, x0l, t0l + +.LDendsqueeze: + beqz mode, .LDendreset + + mov t0h, x0h + mov t0l, x0l + call0 ascon_rev8_half + s32i t0h, a1, 0 + s32i t0l, a1, 4 + + mov a2, optr + mov a3, a1 + call0 ascon_memcpy + +.LDendreset: + bgez mode, .LDreturn + + mov a2, a1 + mov a3, iptr + call0 ascon_memcpy + + l32i t0h, a1, 0 + l32i t0l, a1, 4 + call0 ascon_rev8_half + mov x0h, t0h + mov x0l, t0l + +.LDreturn: + add optr, optr, ilen + add iptr, iptr, ilen + l32i a0, a1, S_lr2 + ret + +.align 4 +.globl ascon_core +.type ascon_core,@function +ascon_core: + abi_entry 80, 4 + s32i a0, a1, S_lr + s32i a2, a1, S_optr + s32i a3, a1, S_iptr + s32i a4, a1, S_ilen + s32i a5, a1, S_iptr_cur + s32i a6, a1, S_ilen_cur + + # load key + l32i a2, a1, S_kptr_arg + l32i t0h, a2, 0 + l32i t0l, a2, 4 + l32i t1h, a2, 8 + l32i t1l, a2, 12 + call0 ascon_rev8 + s32i t0h, a1, (S_key + 0) + s32i t0l, a1, (S_key + 4) + s32i t1h, a1, (S_key + 8) + s32i t1l, a1, (S_key + 12) + mov x1h, t0h + mov x1l, t0l + mov x2h, t1h + mov x2l, t1l + + # load nonce + # a7 is not clobbered by ascon_rev8 + # a7 does not overlap x1, x2, t0, or t1 + # x4 overlaps t1, move unnecessary + mov a2, a7 + l32i t0h, a2, 0 + l32i t0l, a2, 4 + l32i t1h, a2, 8 + l32i t1l, a2, 12 + call0 ascon_rev8 + mov x3h, t0h + mov x3l, t0l + + # load IV + # this clobbers a7 + movi x0h, IVh + movi x0l, IVl + + movi a2, PA_START_ROUND + movi a3, PA_ROUNDS + call0 ascon_permute_noload + + # xor key + # x4 overlaps t1, do in two steps + l32i t0h, a1, (S_key + 0) + l32i t0l, a1, (S_key + 4) + xor x3h, x3h, t0h + xor x3l, x3l, t0l + l32i t0h, a1, (S_key + 8) + l32i t0l, a1, (S_key + 12) + xor x4h, x4h, t0h + xor x4l, x4l, t0l + + # save state + s32i x2h, a1, (S_state + 0) + s32i x2l, a1, (S_state + 4) + s32i x3h, a1, (S_state + 8) + s32i x3l, a1, (S_state + 12) + + l32i ilen, a1, S_ilen_cur + beqz ilen, .LCskipad + + l32i iptr, a1, S_iptr_cur + movi mode, 0 + s32i mode, a1, S_mode_cur + call0 ascon_duplex + + movi a2, PB_START_ROUND + movi a3, PB_ROUNDS + call0 ascon_permute + +.LCskipad: + movi a2, 1 + xor x4l, x4l, a2 + + l32i optr, a1, S_optr + l32i iptr, a1, S_iptr + l32i ilen, a1, S_ilen + l8ui mode, a1, S_mode_arg + sext mode, mode, 7 + s32i mode, a1, S_mode_cur + call0 ascon_duplex + s32i optr, a1, S_optr_cur + s32i iptr, a1, S_iptr_cur + + # restore state + l32i x2h, a1, (S_state + 0) + l32i x2l, a1, (S_state + 4) + l32i x3h, a1, (S_state + 8) + l32i x3l, a1, (S_state + 12) + + # xor key + # x4 overlaps t1, do in two steps + l32i t0h, a1, (S_key + 0) + l32i t0l, a1, (S_key + 4) + xor x1h, x1h, t0h + xor x1l, x1l, t0l + l32i t0h, a1, (S_key + 8) + l32i t0l, a1, (S_key + 12) + xor x2h, x2h, t0h + xor x2l, x2l, t0l + + movi a2, PA_START_ROUND + movi a3, PA_ROUNDS + call0 ascon_permute_noload + + # xor key + # x4 overlaps t1, do in two steps + l32i t0h, a1, (S_key + 0) + l32i t0l, a1, (S_key + 4) + xor x3h, x3h, t0h + xor x3l, x3l, t0l + l32i t0h, a1, (S_key + 8) + l32i t0l, a1, (S_key + 12) + xor x4h, x4h, t0h + xor x4l, x4l, t0l + + l32i a2, a1, S_mode_cur + bgez a2, .LCencrypt +.LCdecrypt: + + # save x4 into x0 + # x0 is no longer needed + # x4 overlaps t1 + mov x0h, x4h + mov x0l, x4l + + l32i a2, a1, S_iptr_cur + l32i t0h, a2, 0 + l32i t0l, a2, 4 + l32i t1h, a2, 8 + l32i t1l, a2, 12 + call0 ascon_rev8 + + # check tag + # x4 is in x0 + xor a2, x3h, t0h + xor a3, x3l, t0l + xor a2, a2, a3 + xor a3, x0h, t1h + xor a2, a2, a3 + xor a3, x0l, t1l + xor a2, a2, a3 + + beqz a2, .LCzeroreturn + movi a2, -1 + j .LCreturn +.LCencrypt: + + # store tag + # x4 overlaps t1, move unnecessary + mov t0h, x3h + mov t0l, x3l + call0 ascon_rev8 + l32i a2, a1, S_optr_cur + s32i t0h, a2, 0 + s32i t0l, a2, 4 + s32i t1h, a2, 8 + s32i t1l, a2, 12 + +.LCzeroreturn: + movi a2, 0 +.LCreturn: + l32i a0, a1, S_lr + abi_return diff --git a/ascon/Implementations/crypto_aead/ascon128v12/asm_esp32/ascon.h b/ascon/Implementations/crypto_aead/ascon128v12/asm_esp32/ascon.h new file mode 100644 index 0000000..88f4da3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/asm_esp32/ascon.h @@ -0,0 +1,7 @@ +#include "api.h" + +int ascon_core(unsigned char * outptr, + const unsigned char * inptr, unsigned int inlen, + const unsigned char * adptr, unsigned int adlen, + const unsigned char * nptr, const unsigned char * kptr, + unsigned char mode); diff --git a/ascon/Implementations/crypto_aead/ascon128v12/asm_esp32/decrypt.c b/ascon/Implementations/crypto_aead/ascon128v12/asm_esp32/decrypt.c new file mode 100644 index 0000000..4ee4528 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/asm_esp32/decrypt.c @@ -0,0 +1,17 @@ +#include "ascon.h" + +int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen, + unsigned char* nsec, const unsigned char* c, + unsigned long long clen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k) { + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + + *mlen = clen - CRYPTO_ABYTES; + (void)nsec; + + return ascon_core(m, c, *mlen, ad, adlen, npub, k, -1); +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/asm_esp32/encrypt.c b/ascon/Implementations/crypto_aead/ascon128v12/asm_esp32/encrypt.c new file mode 100644 index 0000000..a6f95db --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/asm_esp32/encrypt.c @@ -0,0 +1,13 @@ +#include "ascon.h" + +int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, const unsigned char* npub, + const unsigned char* k) { + + *clen = mlen + CRYPTO_ABYTES; + (void)nsec; + + return ascon_core(c, m, mlen, ad, adlen, npub, k, 1); +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/asm_esp32/implementors b/ascon/Implementations/crypto_aead/ascon128v12/asm_esp32/implementors new file mode 100644 index 0000000..1b9a187 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/asm_esp32/implementors @@ -0,0 +1 @@ +Ferdinand Bachmann diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/asm_esp32/api.h b/ascon/Implementations/crypto_aead/ascon80pqv12/asm_esp32/api.h new file mode 100644 index 0000000..4b53d6c --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/asm_esp32/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 20 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/asm_esp32/ascon.S b/ascon/Implementations/crypto_aead/ascon80pqv12/asm_esp32/ascon.S new file mode 100644 index 0000000..d23cfb7 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/asm_esp32/ascon.S @@ -0,0 +1,522 @@ +#include +#include "api.h" + +## REGISTER ALLOCATION +#define t0h a4 +#define t0l a5 +#define x0h a6 +#define x0l a7 +#define x1h a8 +#define x1l a9 +#define x2h a10 +#define x2l a11 +#define x3h a12 +#define x3l a13 +#define x4h a14 +#define x4l a15 +## OVERLAPPING REGISTER ALLOCATION +#define optr x2h +#define iptr x2l +#define ilen x3h +#define mode x3l +#define t1h x4h +#define t1l x4l + +## STACK FRAME LAYOUT +## +-----------+-----------+-----------+------------+-----------+ +## | ASCON128a | ASCON128 | ASCON80PQ | ASCONHASHa | ASCONHASH | +## | RATE 16 | RATE 8 | RATE 8 | RATE 8 | RATE 8 | +## | PA 12 | PA 12 | PA 12 | PA 12 | PA 12 | +## | PB 8 | PB 6 | PB 6 | PB 8 | PB 12 | +## | KEY 16 | KEY 16 | KEY 20 | | | +## +-----------+-----------+-----------+------------+-----------+ +## 0 | bytes | bytes | bytes | bytes | bytes | +## 4 | | | \---- | \---- | \---- | \---- | +## 8 | | | optr | optr | optr | optr | +## 12 | \---- | iptr | iptr | iptr cur | iptr cur | +## 16 | state x2h | state x2h | state x2h | | | +## 20 | | x2l | | x2l | | x2l | state x2l | state x2l | +## 24 | | x3h | | x3h | | x3h | \---- x3h | \---- x3h | +## 28 | | x3l | \---- x3l | \---- x3l | | | +## 32 | | x4h | ilen | ilen | ilen cur | ilen cur | +## 36 | \---- x4l | mode cur | mode cur | olen | olen | +## 40 | key k0h | key k0h | key k1h | | | +## 44 | | k0l | | k0l | | k1l | lr | lr | +## 48 | | k1h | | k1h | | k2h +------------+-----------+ +## 52 | \---- k1l | \---- k1l | | k2l | +## 56 | | | \---- k0h | +## 60 | optr cur | optr cur | optr cur | +## 64 | iptr cur | iptr cur | iptr cur | +## 68 | ilen cur | ilen cur | ilen cur | +## 72 | mode cur | lr2 | lr2 | +## 76 | optr | lr | lr | +## 80 | iptr +-----------+-----------+ +## 84 | ilen | | | +## 88 | lr2 | | | +## 92 | lr +-----------+-----------+ +## 96 +-----------+ kptr arg | kptr arg | +## 100 | | mode arg | mode arg | +## 104 | +-----------+-----------+ +## 108 +-----------+ +## 112 | kptr arg | +## 116 | mode arg | +## 120 +-----------+ + +## ASCON80PQ +#define RATE 8 +#define PA_ROUNDS 12 +#define PA_START_ROUND 0xf0 +#define PB_ROUNDS 6 +#define PB_START_ROUND 0x96 +#define IVh (((8 * CRYPTO_KEYBYTES) << 24) | ((8 * RATE) << 16) | (PA_ROUNDS << 8) | (PB_ROUNDS << 0)) +#define IVl 0 + +#define S_state 16 +#define S_key 40 +#define S_optr_cur 60 +#define S_iptr_cur 64 +#define S_ilen_cur 68 +#define S_mode_cur 36 +#define S_optr 8 +#define S_iptr 12 +#define S_ilen 32 +#define S_lr2 72 +#define S_lr 76 +#define S_kptr_arg 96 +#define S_mode_arg 100 + +.macro sbox x0, x1, x2, x3, x4, r0, t0, t1, t2 + xor \t1, \x0, \x4 + xor \t2, \x3, \x4 + movi \t0, -1 + xor \x4, \x4, \t0 + xor \t0, \x1, \x2 + or \x4, \x4, \x3 + xor \x4, \x4, \t0 + xor \x3, \x3, \x1 + or \x3, \x3, \t0 + xor \x3, \x3, \t1 + xor \x2, \x2, \t1 + or \x2, \x2, \x1 + xor \x2, \x2, \t2 + or \x0, \x0, \t2 + xor \x0, \x0, \t0 + movi \t0, -1 + xor \t1, \t1, \t0 + and \x1, \x1, \t1 + xor \x1, \x1, \t2 + mov \r0, \x0 +.endm + +.macro linear dl, dh, sl, sh, sl0, sh0, r0, sl1, sh1, r1, t0 + ssai \r0 + src \dl, \sh0, \sl0 + src \dh, \sl0, \sh0 + xor \dl, \dl, \sl + xor \dh, \dh, \sh + ssai \r1 + src \t0, \sh1, \sl1 + src \sh, \sl1, \sh1 + xor \dl, \dl, \t0 + xor \dh, \dh, \sh +.endm + +.align 4 +.globl ascon_permute +.type ascon_permute,@function +ascon_permute: + # ascon permutation + # state in a6 .. a9 and sp + 16 .. sp + 36 + # start round in a2 + # temporaries in a3, a4, a5 + l32i x2h, a1, (S_state + 0) + l32i x2l, a1, (S_state + 4) + l32i x3h, a1, (S_state + 8) + l32i x3l, a1, (S_state + 12) +.globl ascon_permute_noload +.type ascon_permute_noload,@function +ascon_permute_noload: + # state in a6 .. a15 + # start round constant in a2 + # round count in a3 + # temporaries in a3, a4, a5 + + # ESP32 zero-overhead looping + floop a3, Ploop +.LPloop: + # round constant + xor x2l, x2l, a2 + + # s-box + sbox x0l, x1l, x2l, x3l, x4l, t0l, t0h, t0l, a3 + sbox x0h, x1h, x2h, x3h, x4h, t0h, t0h, x0l, a3 + + # linear layer + linear x0l, x0h, x2l, x2h, x2l, x2h, 19, x2l, x2h, 28, a3 + linear x2l, x2h, x4l, x4h, x4l, x4h, 1, x4l, x4h, 6, a3 + linear x4l, x4h, x1l, x1h, x1l, x1h, 7, x1h, x1l, 9, a3 + linear x1l, x1h, x3l, x3h, x3h, x3l, 29, x3h, x3l, 7, a3 + linear x3l, x3h, t0l, t0h, t0l, t0h, 10, t0l, t0h, 17, a3 + + # condition + addi a2, a2, -15 + + floopend a3, Ploop +.LPend: + s32i x2h, a1, (S_state + 0) + s32i x2l, a1, (S_state + 4) + s32i x3h, a1, (S_state + 8) + s32i x3l, a1, (S_state + 12) + ret + +.align 4 +.globl ascon_rev8 +.type ascon_rev8,@function +ascon_rev8: + # ascon bytereverse one block + # arguments and results in a4, a5, a14, a15 + # temporaries in a2 + ssai 8 + srli a2, t1h, 16 + src a2, a2, t1h + src a2, a2, a2 + src t1h, t1h, a2 + + srli a2, t1l, 16 + src a2, a2, t1l + src a2, a2, a2 + src t1l, t1l, a2 + +.globl ascon_rev8_half +.type ascon_rev8_half,@function +ascon_rev8_half: + ssai 8 + srli a2, t0h, 16 + src a2, a2, t0h + src a2, a2, a2 + src t0h, t0h, a2 + + srli a2, t0l, 16 + src a2, a2, t0l + src a2, a2, a2 + src t0l, t0l, a2 + + ret + +.align 4 +.globl ascon_memcpy +.type ascon_memcpy,@function +ascon_memcpy: + # memcpy that preserves registers used by ascon + # dest in a2 + # src in a3 + # temporaries in a4, a5 + movi a4, 0 + j .LMcond +.LMloop: + l8ui a5, a3, 0 + s8i a5, a2, 0 + addi a2, a2, 1 + addi a3, a3, 1 + addi a4, a4, 1 +.LMcond: + bltu a4, ilen, .LMloop +.LMend: + ret + +.align 4 +.globl ascon_duplex +.type ascon_duplex,@function +ascon_duplex: + s32i a0, a1, S_lr2 + j .LDcond + +.LDloop: + l32i t0h, iptr, 0 + l32i t0l, iptr, 4 + call0 ascon_rev8_half + xor x0h, x0h, t0h + xor x0l, x0l, t0l + +.LDsqueeze: + beqz a13, .LDreset + + # ascon_rev8 + # inlined here to preserve registers + ssai 8 + srli a2, x0h, 16 + src a2, a2, x0h + src a2, a2, a2 + src a2, x0h, a2 + s32i a2, optr, 0 + + srli a2, x0l, 16 + src a2, a2, x0l + src a2, a2, a2 + src a2, x0l, a2 + s32i a2, optr, 4 + +.LDreset: + bgez mode, .LDpermute + mov x0h, t0h + mov x0l, t0l + +.LDpermute: + s32i optr, a1, S_optr_cur + s32i iptr, a1, S_iptr_cur + s32i ilen, a1, S_ilen_cur + movi a2, PB_START_ROUND + movi a3, PB_ROUNDS + call0 ascon_permute + l32i optr, a1, S_optr_cur + l32i iptr, a1, S_iptr_cur + l32i ilen, a1, S_ilen_cur + l32i mode, a1, S_mode_cur + + addi optr, optr, RATE + addi iptr, iptr, RATE + addi ilen, ilen, -RATE + +.LDcond: + bgeui ilen, RATE, .LDloop + +.LDend: + movi a2, 0 + s32i a2, a1, 0 + s32i a2, a1, 4 + + mov a2, a1 + mov a3, iptr + call0 ascon_memcpy + + movi a4, 0x80 + add a2, a1, ilen + l8ui a3, a2, 0 + xor a3, a3, a4 + s8i a3, a2, 0 + + l32i t0h, a1, 0 + l32i t0l, a1, 4 + call0 ascon_rev8_half + xor x0h, x0h, t0h + xor x0l, x0l, t0l + +.LDendsqueeze: + beqz mode, .LDendreset + + mov t0h, x0h + mov t0l, x0l + call0 ascon_rev8_half + s32i t0h, a1, 0 + s32i t0l, a1, 4 + + mov a2, optr + mov a3, a1 + call0 ascon_memcpy + +.LDendreset: + bgez mode, .LDreturn + + mov a2, a1 + mov a3, iptr + call0 ascon_memcpy + + l32i t0h, a1, 0 + l32i t0l, a1, 4 + call0 ascon_rev8_half + mov x0h, t0h + mov x0l, t0l + +.LDreturn: + add optr, optr, ilen + add iptr, iptr, ilen + l32i a0, a1, S_lr2 + ret + +.align 4 +.globl ascon_core +.type ascon_core,@function +ascon_core: + abi_entry 80, 4 + s32i a0, a1, S_lr + s32i a2, a1, S_optr + s32i a3, a1, S_iptr + s32i a4, a1, S_ilen + s32i a5, a1, S_iptr_cur + s32i a6, a1, S_ilen_cur + + # load key + l32i a2, a1, S_kptr_arg + l32i t0h, a2, 0 + ssai 8 + srli t0l, t0h, 16 + src t0l, t0l, t0h + src t0l, t0l, t0l + src t0h, t0h, t0l + s32i t0h, a1, (S_key + 16) + + l32i t0h, a2, 4 + l32i t0l, a2, 8 + l32i t1h, a2, 12 + l32i t1l, a2, 16 + call0 ascon_rev8 + s32i t0h, a1, (S_key + 0) + s32i t0l, a1, (S_key + 4) + s32i t1h, a1, (S_key + 8) + s32i t1l, a1, (S_key + 12) + mov x1h, t0h + mov x1l, t0l + mov x2h, t1h + mov x2l, t1l + + # load nonce + # a7 is not clobbered by ascon_rev8 + # a7 does not overlap x1, x2, t0, or t1 + # x4 overlaps t1, move unnecessary + mov a2, a7 + l32i t0h, a2, 0 + l32i t0l, a2, 4 + l32i t1h, a2, 8 + l32i t1l, a2, 12 + call0 ascon_rev8 + mov x3h, t0h + mov x3l, t0l + + # load IV + movi x0h, IVh + + # load K0.h + # this clobbers a7 + l32i x0l, a1, (S_key + 16) + + movi a2, PA_START_ROUND + movi a3, PA_ROUNDS + call0 ascon_permute_noload + + # xor key + # x4 overlaps t1, do in two steps + l32i t0h, a1, (S_key + 16) + xor x2l, x2l, t0h + l32i t0h, a1, (S_key + 0) + l32i t0l, a1, (S_key + 4) + xor x3h, x3h, t0h + xor x3l, x3l, t0l + l32i t0h, a1, (S_key + 8) + l32i t0l, a1, (S_key + 12) + xor x4h, x4h, t0h + xor x4l, x4l, t0l + + # save state + s32i x2h, a1, (S_state + 0) + s32i x2l, a1, (S_state + 4) + s32i x3h, a1, (S_state + 8) + s32i x3l, a1, (S_state + 12) + + l32i ilen, a1, S_ilen_cur + beqz ilen, .LCskipad + + l32i iptr, a1, S_iptr_cur + movi mode, 0 + s32i mode, a1, S_mode_cur + call0 ascon_duplex + + movi a2, PB_START_ROUND + movi a3, PB_ROUNDS + call0 ascon_permute + +.LCskipad: + movi a2, 1 + xor x4l, x4l, a2 + + l32i optr, a1, S_optr + l32i iptr, a1, S_iptr + l32i ilen, a1, S_ilen + l8ui mode, a1, S_mode_arg + sext mode, mode, 7 + s32i mode, a1, S_mode_cur + call0 ascon_duplex + s32i optr, a1, S_optr_cur + s32i iptr, a1, S_iptr_cur + + # restore state + l32i x2h, a1, (S_state + 0) + l32i x2l, a1, (S_state + 4) + l32i x3h, a1, (S_state + 8) + l32i x3l, a1, (S_state + 12) + + # xor key + # x4 overlaps t1, do in two steps + l32i t0h, a1, (S_key + 16) + xor x1h, x1h, t0h + l32i t0h, a1, (S_key + 0) + l32i t0l, a1, (S_key + 4) + xor x1l, x1l, t0h + xor x2h, x2h, t0l + l32i t0h, a1, (S_key + 8) + l32i t0l, a1, (S_key + 12) + xor x2l, x2l, t0h + xor x3h, x3h, t0l + + movi a2, PA_START_ROUND + movi a3, PA_ROUNDS + call0 ascon_permute_noload + + # xor key + # x4 overlaps t1, do in two steps + l32i t0h, a1, (S_key + 0) + l32i t0l, a1, (S_key + 4) + xor x3h, x3h, t0h + xor x3l, x3l, t0l + l32i t0h, a1, (S_key + 8) + l32i t0l, a1, (S_key + 12) + xor x4h, x4h, t0h + xor x4l, x4l, t0l + + l32i a2, a1, S_mode_cur + bgez a2, .LCencrypt +.LCdecrypt: + + # save x4 into x0 + # x0 is no longer needed + # x4 overlaps t1 + mov x0h, x4h + mov x0l, x4l + + l32i a2, a1, S_iptr_cur + l32i t0h, a2, 0 + l32i t0l, a2, 4 + l32i t1h, a2, 8 + l32i t1l, a2, 12 + call0 ascon_rev8 + + # check tag + # x4 is in x0 + xor a2, x3h, t0h + xor a3, x3l, t0l + xor a2, a2, a3 + xor a3, x0h, t1h + xor a2, a2, a3 + xor a3, x0l, t1l + xor a2, a2, a3 + + beqz a2, .LCzeroreturn + movi a2, -1 + j .LCreturn +.LCencrypt: + + # store tag + # x4 overlaps t1, move unnecessary + mov t0h, x3h + mov t0l, x3l + call0 ascon_rev8 + l32i a2, a1, S_optr_cur + s32i t0h, a2, 0 + s32i t0l, a2, 4 + s32i t1h, a2, 8 + s32i t1l, a2, 12 + +.LCzeroreturn: + movi a2, 0 +.LCreturn: + l32i a0, a1, S_lr + abi_return diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/asm_esp32/ascon.h b/ascon/Implementations/crypto_aead/ascon80pqv12/asm_esp32/ascon.h new file mode 100644 index 0000000..88f4da3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/asm_esp32/ascon.h @@ -0,0 +1,7 @@ +#include "api.h" + +int ascon_core(unsigned char * outptr, + const unsigned char * inptr, unsigned int inlen, + const unsigned char * adptr, unsigned int adlen, + const unsigned char * nptr, const unsigned char * kptr, + unsigned char mode); diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/asm_esp32/decrypt.c b/ascon/Implementations/crypto_aead/ascon80pqv12/asm_esp32/decrypt.c new file mode 100644 index 0000000..4ee4528 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/asm_esp32/decrypt.c @@ -0,0 +1,17 @@ +#include "ascon.h" + +int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen, + unsigned char* nsec, const unsigned char* c, + unsigned long long clen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k) { + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + + *mlen = clen - CRYPTO_ABYTES; + (void)nsec; + + return ascon_core(m, c, *mlen, ad, adlen, npub, k, -1); +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/asm_esp32/encrypt.c b/ascon/Implementations/crypto_aead/ascon80pqv12/asm_esp32/encrypt.c new file mode 100644 index 0000000..a6f95db --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/asm_esp32/encrypt.c @@ -0,0 +1,13 @@ +#include "ascon.h" + +int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, const unsigned char* npub, + const unsigned char* k) { + + *clen = mlen + CRYPTO_ABYTES; + (void)nsec; + + return ascon_core(c, m, mlen, ad, adlen, npub, k, 1); +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/asm_esp32/implementors b/ascon/Implementations/crypto_aead/ascon80pqv12/asm_esp32/implementors new file mode 100644 index 0000000..1b9a187 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/asm_esp32/implementors @@ -0,0 +1 @@ +Ferdinand Bachmann