Commit c1af5db0 by Enrico Pozzobon

Finalists

parent 9c6d9e4a

Too many changes to show.

To preserve performance only 831 of 831+ files are displayed.

......@@ -22,15 +22,19 @@
forceinline void ascon_loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 16) {
*K1 = XOR(*K1, LOAD(k, 8));
*K2 = XOR(*K2, LOAD(k + 8, 8));
}
if (CRYPTO_KEYBYTES == 20) {
*K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4)));
k += 4;
*K0 = XOR(*K0, KEYROT(WORD_T(0), LOADBYTES(k, 4)));
*K1 = XOR(*K1, LOADBYTES(k + 4, 8));
*K2 = XOR(*K2, LOADBYTES(k + 12, 8));
}
*K1 = XOR(*K1, LOAD(k, 8));
*K2 = XOR(*K2, LOAD(k + 8, 8));
}
forceinline void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
forceinline void ascon_aeadinit(state_t* s, const uint8_t* npub,
const uint8_t* k) {
/* load nonce */
word_t N0 = LOAD(npub, 8);
word_t N1 = LOAD(npub + 8, 8);
......@@ -39,9 +43,9 @@ forceinline void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
ascon_loadkey(&K0, &K1, &K2, k);
/* initialize */
PINIT(s);
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8)
if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 8)
s->x0 = XOR(s->x0, ASCON_128_IV);
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16)
if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 16)
s->x0 = XOR(s->x0, ASCON_128A_IV);
if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, ASCON_80PQ_IV);
if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0);
......@@ -58,23 +62,23 @@ forceinline void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
forceinline void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen) {
const __m512i u64big = AVX512_SHUFFLE_U64BIG;
const int mask = (ASCON_RATE == 8) ? 0xff : 0xffff;
const int nr = (ASCON_RATE == 8) ? 6 : 8;
const int mask = (ASCON_AEAD_RATE == 8) ? 0xff : 0xffff;
const int nr = (ASCON_AEAD_RATE == 8) ? 6 : 8;
state_t r = *s, t;
if (adlen) {
/* full associated data blocks */
while (adlen >= ASCON_RATE) {
while (adlen >= ASCON_AEAD_RATE) {
t.z = _mm512_maskz_loadu_epi8(mask, ad);
t.z = _mm512_maskz_shuffle_epi8(mask, t.z, u64big);
r.z = _mm512_xor_epi64(r.z, t.z);
P(&r, nr);
ad += ASCON_RATE;
adlen -= ASCON_RATE;
ad += ASCON_AEAD_RATE;
adlen -= ASCON_AEAD_RATE;
}
*s = r;
/* final associated data block */
word_t* px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) {
if (ASCON_AEAD_RATE == 16 && adlen >= 8) {
s->x0 = XOR(s->x0, LOAD(ad, 8));
px = &s->x1;
ad += 8;
......@@ -92,25 +96,25 @@ forceinline void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen) {
forceinline void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
const __m512i u64big = AVX512_SHUFFLE_U64BIG;
const int mask = (ASCON_RATE == 8) ? 0xff : 0xffff;
const int nr = (ASCON_RATE == 8) ? 6 : 8;
const int mask = (ASCON_AEAD_RATE == 8) ? 0xff : 0xffff;
const int nr = (ASCON_AEAD_RATE == 8) ? 6 : 8;
state_t r = *s, t;
/* full plaintext blocks */
while (mlen >= ASCON_RATE) {
while (mlen >= ASCON_AEAD_RATE) {
t.z = _mm512_maskz_loadu_epi8(mask, m);
t.z = _mm512_maskz_shuffle_epi8(mask, t.z, u64big);
r.z = _mm512_xor_epi64(r.z, t.z);
t.z = _mm512_maskz_shuffle_epi8(mask, r.z, u64big);
_mm512_mask_storeu_epi8(c, mask, t.z);
P(&r, nr);
m += ASCON_RATE;
c += ASCON_RATE;
mlen -= ASCON_RATE;
m += ASCON_AEAD_RATE;
c += ASCON_AEAD_RATE;
mlen -= ASCON_AEAD_RATE;
}
*s = r;
/* final plaintext block */
word_t* px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) {
if (ASCON_AEAD_RATE == 16 && mlen >= 8) {
s->x0 = XOR(s->x0, LOAD(m, 8));
STORE(c, s->x0, 8);
px = &s->x1;
......@@ -129,11 +133,11 @@ forceinline void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m,
forceinline void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
const __m512i u64big = AVX512_SHUFFLE_U64BIG;
const int mask = (ASCON_RATE == 8) ? 0xff : 0xffff;
const int nr = (ASCON_RATE == 8) ? 6 : 8;
const int mask = (ASCON_AEAD_RATE == 8) ? 0xff : 0xffff;
const int nr = (ASCON_AEAD_RATE == 8) ? 6 : 8;
state_t r = *s, t, u;
/* full ciphertext blocks */
while (clen >= ASCON_RATE) {
while (clen >= ASCON_AEAD_RATE) {
t.z = _mm512_maskz_loadu_epi8(mask, c);
t.z = _mm512_maskz_shuffle_epi8(mask, t.z, u64big);
r.z = _mm512_xor_epi64(r.z, t.z);
......@@ -141,14 +145,14 @@ forceinline void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c,
r.z = _mm512_mask_blend_epi8(mask, r.z, t.z);
_mm512_mask_storeu_epi8(m, mask, u.z);
P(&r, nr);
m += ASCON_RATE;
c += ASCON_RATE;
clen -= ASCON_RATE;
m += ASCON_AEAD_RATE;
c += ASCON_AEAD_RATE;
clen -= ASCON_AEAD_RATE;
}
*s = r;
/* final ciphertext block */
word_t* px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) {
if (ASCON_AEAD_RATE == 16 && clen >= 8) {
word_t cx = LOAD(c, 8);
s->x0 = XOR(s->x0, cx);
STORE(m, s->x0, 8);
......@@ -174,11 +178,11 @@ forceinline void ascon_final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2;
ascon_loadkey(&K0, &K1, &K2, k);
/* finalize */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 8) {
s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 16) {
s->x2 = XOR(s->x2, K1);
s->x3 = XOR(s->x3, K2);
}
......@@ -202,7 +206,7 @@ int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
(void)nsec;
*clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */
ascon_init(&s, npub, k);
ascon_aeadinit(&s, npub, k);
ascon_adata(&s, ad, adlen);
ascon_encrypt(&s, c, m, mlen);
ascon_final(&s, k);
......@@ -222,7 +226,7 @@ int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
if (clen < CRYPTO_ABYTES) return -1;
*mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */
ascon_init(&s, npub, k);
ascon_aeadinit(&s, npub, k);
ascon_adata(&s, ad, adlen);
ascon_decrypt(&s, m, c, clen);
ascon_final(&s, k);
......
#define CRYPTO_VERSION "1.2.4"
#define CRYPTO_VERSION "1.2.5"
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16
#define ASCON_AEAD_RATE 16
......@@ -13,7 +13,7 @@ typedef union {
};
} state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_aeadinit(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
......
......@@ -15,18 +15,29 @@
#define ASCON_128_RATE 8
#define ASCON_128A_RATE 16
#define ASCON_HASH_RATE 8
#define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PA_ROUNDS 12
#define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_PA_ROUNDS 12
#define ASCON_HASH_PB_ROUNDS 12
#define ASCON_HASHA_PA_ROUNDS 12
#define ASCON_HASHA_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32
#define ASCON_128_IV WORD_T(0x80400c0600000000)
#define ASCON_128A_IV WORD_T(0x80800c0800000000)
#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000)
#define ASCON_HASH_IV WORD_T(0x00400c0000000100)
#define ASCON_XOF_IV WORD_T(0x00400c0000000000)
#define ASCON_128_IV WORD_T(0x80400c0600000000ull)
#define ASCON_128A_IV WORD_T(0x80800c0800000000ull)
#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000ull)
#define ASCON_HASH_IV WORD_T(0x00400c0000000100ull)
#define ASCON_HASHA_IV WORD_T(0x00400c0400000100ull)
#define ASCON_XOF_IV WORD_T(0x00400c0000000000ull)
#define ASCON_XOFA_IV WORD_T(0x00400c0400000000ull)
#define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull)
#define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull)
......@@ -34,12 +45,24 @@
#define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull)
#define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull)
#define ASCON_HASHA_IV0 WORD_T(0x01470194fc6528a6ull)
#define ASCON_HASHA_IV1 WORD_T(0x738ec38ac0adffa7ull)
#define ASCON_HASHA_IV2 WORD_T(0x2ec8e3296c76384cull)
#define ASCON_HASHA_IV3 WORD_T(0xd6f6a54d7f52377dull)
#define ASCON_HASHA_IV4 WORD_T(0xa13c42a223be8d87ull)
#define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull)
#define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull)
#define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull)
#define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull)
#define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull)
#define ASCON_XOFA_IV0 WORD_T(0x44906568b77b9832ull)
#define ASCON_XOFA_IV1 WORD_T(0xcd8d6cae53455532ull)
#define ASCON_XOFA_IV2 WORD_T(0xf7b5212756422129ull)
#define ASCON_XOFA_IV3 WORD_T(0x246885e1de0d225bull)
#define ASCON_XOFA_IV4 WORD_T(0xa8cb5ce33449973full)
#define START(n) ((3 + (n)) << 4 | (12 - (n)))
#define RC(c) WORD_T(c)
......
#include "api.h"
#include "ascon.h"
#include "crypto_aead.h"
#include "permutations.h"
#include "printstate.h"
__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
#if !ASCON_INLINE_MODE
#undef forceinline
#define forceinline
#endif
forceinline void ascon_loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 16) {
*K1 = XOR(*K1, LOAD(k, 8));
*K2 = XOR(*K2, LOAD(k + 8, 8));
}
if (CRYPTO_KEYBYTES == 20) {
*K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4)));
k += 4;
*K0 = XOR(*K0, KEYROT(WORD_T(0), LOADBYTES(k, 4)));
*K1 = XOR(*K1, LOADBYTES(k + 4, 8));
*K2 = XOR(*K2, LOADBYTES(k + 12, 8));
}
*K1 = XOR(*K1, LOAD64(k));
*K2 = XOR(*K2, LOAD64(k + 8));
}
__forceinline void init(state_t* s, const uint8_t* npub, word_t K0, word_t K1,
word_t K2) {
word_t N0, N1;
forceinline void ascon_aeadinit(state_t* s, const uint8_t* npub,
const uint8_t* k) {
/* load nonce */
N0 = LOAD64(npub);
N1 = LOAD64(npub + 8);
/* initialization */
word_t N0 = LOAD(npub, 8);
word_t N1 = LOAD(npub + 8, 8);
/* load key */
word_t K0, K1, K2;
ascon_loadkey(&K0, &K1, &K2, k);
/* initialize */
PINIT(s);
s->x0 = XOR(s->x0, IV);
if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 8)
s->x0 = XOR(s->x0, ASCON_128_IV);
if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 16)
s->x0 = XOR(s->x0, ASCON_128A_IV);
if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, ASCON_80PQ_IV);
if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0);
s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2);
s->x3 = XOR(s->x3, N0);
s->x4 = XOR(s->x4, N1);
P12(s);
P(s, 12);
if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("initialization", s);
}
__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
word_t* restrict px;
/* process associated data */
forceinline void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen) {
const int nr = (ASCON_AEAD_RATE == 8) ? 6 : 8;
if (adlen) {
while (adlen >= ASCON_RATE) {
s->x0 = XOR(s->x0, LOAD64(ad));
if (ASCON_RATE == 16) s->x1 = XOR(s->x1, LOAD64(ad + 8));
PB(s);
ad += ASCON_RATE;
adlen -= ASCON_RATE;
/* full associated data blocks */
while (adlen >= ASCON_AEAD_RATE) {
s->x0 = XOR(s->x0, LOAD(ad, 8));
if (ASCON_AEAD_RATE == 16) s->x1 = XOR(s->x1, LOAD(ad + 8, 8));
P(s, nr);
ad += ASCON_AEAD_RATE;
adlen -= ASCON_AEAD_RATE;
}
/* final associated data block */
px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) {
s->x0 = XOR(s->x0, LOAD64(ad));
word_t* px = &s->x0;
if (ASCON_AEAD_RATE == 16 && adlen >= 8) {
s->x0 = XOR(s->x0, LOAD(ad, 8));
px = &s->x1;
ad += 8;
adlen -= 8;
}
if (adlen) *px = XOR(*px, LOAD(ad, adlen));
*px = XOR(*px, PAD(adlen));
PB(s);
if (adlen) *px = XOR(*px, LOAD(ad, adlen));
P(s, nr);
}
/* domain separation */
s->x4 = XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
}
__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
word_t* restrict px;
/* process plaintext */
while (mlen >= ASCON_RATE) {
s->x0 = XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
if (ASCON_RATE == 16) {
s->x1 = XOR(s->x1, LOAD64(m + 8));
STORE64(c + 8, s->x1);
forceinline void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
const int nr = (ASCON_AEAD_RATE == 8) ? 6 : 8;
/* full plaintext blocks */
while (mlen >= ASCON_AEAD_RATE) {
s->x0 = XOR(s->x0, LOAD(m, 8));
STORE(c, s->x0, 8);
if (ASCON_AEAD_RATE == 16) {
s->x1 = XOR(s->x1, LOAD(m + 8, 8));
STORE(c + 8, s->x1, 8);
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
mlen -= ASCON_RATE;
P(s, nr);
m += ASCON_AEAD_RATE;
c += ASCON_AEAD_RATE;
mlen -= ASCON_AEAD_RATE;
}
/* final plaintext block */
px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) {
s->x0 = XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
word_t* px = &s->x0;
if (ASCON_AEAD_RATE == 16 && mlen >= 8) {
s->x0 = XOR(s->x0, LOAD(m, 8));
STORE(c, s->x0, 8);
px = &s->x1;
m += 8;
c += 8;
mlen -= 8;
}
*px = XOR(*px, PAD(mlen));
if (mlen) {
*px = XOR(*px, LOAD(m, mlen));
STORE(c, *px, mlen);
}
*px = XOR(*px, PAD(mlen));
printstate("process plaintext", s);
}
__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
word_t* restrict px;
word_t cx;
/* process ciphertext */
while (clen >= ASCON_RATE) {
cx = LOAD64(c);
forceinline void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
const int nr = (ASCON_AEAD_RATE == 8) ? 6 : 8;
/* full ciphertext blocks */
while (clen >= ASCON_AEAD_RATE) {
word_t cx = LOAD(c, 8);
s->x0 = XOR(s->x0, cx);
STORE64(m, s->x0);
STORE(m, s->x0, 8);
s->x0 = cx;
if (ASCON_RATE == 16) {
cx = LOAD64(c + 8);
if (ASCON_AEAD_RATE == 16) {
cx = LOAD(c + 8, 8);
s->x1 = XOR(s->x1, cx);
STORE64(m + 8, s->x1);
STORE(m + 8, s->x1, 8);
s->x1 = cx;
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
clen -= ASCON_RATE;
P(s, nr);
m += ASCON_AEAD_RATE;
c += ASCON_AEAD_RATE;
clen -= ASCON_AEAD_RATE;
}
/* final ciphertext block */