Commit c1af5db0 by Enrico Pozzobon

Finalists

parent 9c6d9e4a

Too many changes to show.

To preserve performance only 1000 of 1000+ files are displayed.

...@@ -22,15 +22,19 @@ ...@@ -22,15 +22,19 @@
forceinline void ascon_loadkey(word_t* K0, word_t* K1, word_t* K2, forceinline void ascon_loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) { const uint8_t* k) {
KINIT(K0, K1, K2); KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 16) {
*K1 = XOR(*K1, LOAD(k, 8));
*K2 = XOR(*K2, LOAD(k + 8, 8));
}
if (CRYPTO_KEYBYTES == 20) { if (CRYPTO_KEYBYTES == 20) {
*K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); *K0 = XOR(*K0, KEYROT(WORD_T(0), LOADBYTES(k, 4)));
k += 4; *K1 = XOR(*K1, LOADBYTES(k + 4, 8));
*K2 = XOR(*K2, LOADBYTES(k + 12, 8));
} }
*K1 = XOR(*K1, LOAD(k, 8));
*K2 = XOR(*K2, LOAD(k + 8, 8));
} }
forceinline void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { forceinline void ascon_aeadinit(state_t* s, const uint8_t* npub,
const uint8_t* k) {
/* load nonce */ /* load nonce */
word_t N0 = LOAD(npub, 8); word_t N0 = LOAD(npub, 8);
word_t N1 = LOAD(npub + 8, 8); word_t N1 = LOAD(npub + 8, 8);
...@@ -39,9 +43,9 @@ forceinline void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { ...@@ -39,9 +43,9 @@ forceinline void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
ascon_loadkey(&K0, &K1, &K2, k); ascon_loadkey(&K0, &K1, &K2, k);
/* initialize */ /* initialize */
PINIT(s); PINIT(s);
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 8)
s->x0 = XOR(s->x0, ASCON_128_IV); s->x0 = XOR(s->x0, ASCON_128_IV);
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 16)
s->x0 = XOR(s->x0, ASCON_128A_IV); s->x0 = XOR(s->x0, ASCON_128A_IV);
if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, ASCON_80PQ_IV); if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, ASCON_80PQ_IV);
if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0);
...@@ -58,23 +62,23 @@ forceinline void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { ...@@ -58,23 +62,23 @@ forceinline void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
forceinline void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen) { forceinline void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen) {
const __m512i u64big = AVX512_SHUFFLE_U64BIG; const __m512i u64big = AVX512_SHUFFLE_U64BIG;
const int mask = (ASCON_RATE == 8) ? 0xff : 0xffff; const int mask = (ASCON_AEAD_RATE == 8) ? 0xff : 0xffff;
const int nr = (ASCON_RATE == 8) ? 6 : 8; const int nr = (ASCON_AEAD_RATE == 8) ? 6 : 8;
state_t r = *s, t; state_t r = *s, t;
if (adlen) { if (adlen) {
/* full associated data blocks */ /* full associated data blocks */
while (adlen >= ASCON_RATE) { while (adlen >= ASCON_AEAD_RATE) {
t.z = _mm512_maskz_loadu_epi8(mask, ad); t.z = _mm512_maskz_loadu_epi8(mask, ad);
t.z = _mm512_maskz_shuffle_epi8(mask, t.z, u64big); t.z = _mm512_maskz_shuffle_epi8(mask, t.z, u64big);
r.z = _mm512_xor_epi64(r.z, t.z); r.z = _mm512_xor_epi64(r.z, t.z);
P(&r, nr); P(&r, nr);
ad += ASCON_RATE; ad += ASCON_AEAD_RATE;
adlen -= ASCON_RATE; adlen -= ASCON_AEAD_RATE;
} }
*s = r; *s = r;
/* final associated data block */ /* final associated data block */
word_t* px = &s->x0; word_t* px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) { if (ASCON_AEAD_RATE == 16 && adlen >= 8) {
s->x0 = XOR(s->x0, LOAD(ad, 8)); s->x0 = XOR(s->x0, LOAD(ad, 8));
px = &s->x1; px = &s->x1;
ad += 8; ad += 8;
...@@ -92,25 +96,25 @@ forceinline void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen) { ...@@ -92,25 +96,25 @@ forceinline void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen) {
forceinline void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, forceinline void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) { uint64_t mlen) {
const __m512i u64big = AVX512_SHUFFLE_U64BIG; const __m512i u64big = AVX512_SHUFFLE_U64BIG;
const int mask = (ASCON_RATE == 8) ? 0xff : 0xffff; const int mask = (ASCON_AEAD_RATE == 8) ? 0xff : 0xffff;
const int nr = (ASCON_RATE == 8) ? 6 : 8; const int nr = (ASCON_AEAD_RATE == 8) ? 6 : 8;
state_t r = *s, t; state_t r = *s, t;
/* full plaintext blocks */ /* full plaintext blocks */
while (mlen >= ASCON_RATE) { while (mlen >= ASCON_AEAD_RATE) {
t.z = _mm512_maskz_loadu_epi8(mask, m); t.z = _mm512_maskz_loadu_epi8(mask, m);
t.z = _mm512_maskz_shuffle_epi8(mask, t.z, u64big); t.z = _mm512_maskz_shuffle_epi8(mask, t.z, u64big);
r.z = _mm512_xor_epi64(r.z, t.z); r.z = _mm512_xor_epi64(r.z, t.z);
t.z = _mm512_maskz_shuffle_epi8(mask, r.z, u64big); t.z = _mm512_maskz_shuffle_epi8(mask, r.z, u64big);
_mm512_mask_storeu_epi8(c, mask, t.z); _mm512_mask_storeu_epi8(c, mask, t.z);
P(&r, nr); P(&r, nr);
m += ASCON_RATE; m += ASCON_AEAD_RATE;
c += ASCON_RATE; c += ASCON_AEAD_RATE;
mlen -= ASCON_RATE; mlen -= ASCON_AEAD_RATE;
} }
*s = r; *s = r;
/* final plaintext block */ /* final plaintext block */
word_t* px = &s->x0; word_t* px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) { if (ASCON_AEAD_RATE == 16 && mlen >= 8) {
s->x0 = XOR(s->x0, LOAD(m, 8)); s->x0 = XOR(s->x0, LOAD(m, 8));
STORE(c, s->x0, 8); STORE(c, s->x0, 8);
px = &s->x1; px = &s->x1;
...@@ -129,11 +133,11 @@ forceinline void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, ...@@ -129,11 +133,11 @@ forceinline void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m,
forceinline void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, forceinline void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) { uint64_t clen) {
const __m512i u64big = AVX512_SHUFFLE_U64BIG; const __m512i u64big = AVX512_SHUFFLE_U64BIG;
const int mask = (ASCON_RATE == 8) ? 0xff : 0xffff; const int mask = (ASCON_AEAD_RATE == 8) ? 0xff : 0xffff;
const int nr = (ASCON_RATE == 8) ? 6 : 8; const int nr = (ASCON_AEAD_RATE == 8) ? 6 : 8;
state_t r = *s, t, u; state_t r = *s, t, u;
/* full ciphertext blocks */ /* full ciphertext blocks */
while (clen >= ASCON_RATE) { while (clen >= ASCON_AEAD_RATE) {
t.z = _mm512_maskz_loadu_epi8(mask, c); t.z = _mm512_maskz_loadu_epi8(mask, c);
t.z = _mm512_maskz_shuffle_epi8(mask, t.z, u64big); t.z = _mm512_maskz_shuffle_epi8(mask, t.z, u64big);
r.z = _mm512_xor_epi64(r.z, t.z); r.z = _mm512_xor_epi64(r.z, t.z);
...@@ -141,14 +145,14 @@ forceinline void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, ...@@ -141,14 +145,14 @@ forceinline void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c,
r.z = _mm512_mask_blend_epi8(mask, r.z, t.z); r.z = _mm512_mask_blend_epi8(mask, r.z, t.z);
_mm512_mask_storeu_epi8(m, mask, u.z); _mm512_mask_storeu_epi8(m, mask, u.z);
P(&r, nr); P(&r, nr);
m += ASCON_RATE; m += ASCON_AEAD_RATE;
c += ASCON_RATE; c += ASCON_AEAD_RATE;
clen -= ASCON_RATE; clen -= ASCON_AEAD_RATE;
} }
*s = r; *s = r;
/* final ciphertext block */ /* final ciphertext block */
word_t* px = &s->x0; word_t* px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) { if (ASCON_AEAD_RATE == 16 && clen >= 8) {
word_t cx = LOAD(c, 8); word_t cx = LOAD(c, 8);
s->x0 = XOR(s->x0, cx); s->x0 = XOR(s->x0, cx);
STORE(m, s->x0, 8); STORE(m, s->x0, 8);
...@@ -174,11 +178,11 @@ forceinline void ascon_final(state_t* s, const uint8_t* k) { ...@@ -174,11 +178,11 @@ forceinline void ascon_final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2; word_t K0, K1, K2;
ascon_loadkey(&K0, &K1, &K2, k); ascon_loadkey(&K0, &K1, &K2, k);
/* finalize */ /* finalize */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 8) {
s->x1 = XOR(s->x1, K1); s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2); s->x2 = XOR(s->x2, K2);
} }
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 16) {
s->x2 = XOR(s->x2, K1); s->x2 = XOR(s->x2, K1);
s->x3 = XOR(s->x3, K2); s->x3 = XOR(s->x3, K2);
} }
...@@ -202,7 +206,7 @@ int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, ...@@ -202,7 +206,7 @@ int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
(void)nsec; (void)nsec;
*clen = mlen + CRYPTO_ABYTES; *clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */ /* perform ascon computation */
ascon_init(&s, npub, k); ascon_aeadinit(&s, npub, k);
ascon_adata(&s, ad, adlen); ascon_adata(&s, ad, adlen);
ascon_encrypt(&s, c, m, mlen); ascon_encrypt(&s, c, m, mlen);
ascon_final(&s, k); ascon_final(&s, k);
...@@ -222,7 +226,7 @@ int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen, ...@@ -222,7 +226,7 @@ int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
if (clen < CRYPTO_ABYTES) return -1; if (clen < CRYPTO_ABYTES) return -1;
*mlen = clen = clen - CRYPTO_ABYTES; *mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */ /* perform ascon computation */
ascon_init(&s, npub, k); ascon_aeadinit(&s, npub, k);
ascon_adata(&s, ad, adlen); ascon_adata(&s, ad, adlen);
ascon_decrypt(&s, m, c, clen); ascon_decrypt(&s, m, c, clen);
ascon_final(&s, k); ascon_final(&s, k);
......
#define CRYPTO_VERSION "1.2.4" #define CRYPTO_VERSION "1.2.5"
#define CRYPTO_KEYBYTES 16 #define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0 #define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16 #define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16 #define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1 #define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16 #define ASCON_AEAD_RATE 16
...@@ -13,7 +13,7 @@ typedef union { ...@@ -13,7 +13,7 @@ typedef union {
}; };
} state_t; } state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); void ascon_aeadinit(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen); void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
......
...@@ -15,18 +15,29 @@ ...@@ -15,18 +15,29 @@
#define ASCON_128_RATE 8 #define ASCON_128_RATE 8
#define ASCON_128A_RATE 16 #define ASCON_128A_RATE 16
#define ASCON_HASH_RATE 8
#define ASCON_128_PA_ROUNDS 12 #define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6 #define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PA_ROUNDS 12
#define ASCON_128A_PB_ROUNDS 8 #define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_PA_ROUNDS 12
#define ASCON_HASH_PB_ROUNDS 12
#define ASCON_HASHA_PA_ROUNDS 12
#define ASCON_HASHA_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32 #define ASCON_HASH_BYTES 32
#define ASCON_128_IV WORD_T(0x80400c0600000000) #define ASCON_128_IV WORD_T(0x80400c0600000000ull)
#define ASCON_128A_IV WORD_T(0x80800c0800000000) #define ASCON_128A_IV WORD_T(0x80800c0800000000ull)
#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000) #define ASCON_80PQ_IV WORD_T(0xa0400c0600000000ull)
#define ASCON_HASH_IV WORD_T(0x00400c0000000100) #define ASCON_HASH_IV WORD_T(0x00400c0000000100ull)
#define ASCON_XOF_IV WORD_T(0x00400c0000000000) #define ASCON_HASHA_IV WORD_T(0x00400c0400000100ull)
#define ASCON_XOF_IV WORD_T(0x00400c0000000000ull)
#define ASCON_XOFA_IV WORD_T(0x00400c0400000000ull)
#define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull) #define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull)
#define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull) #define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull)
...@@ -34,12 +45,24 @@ ...@@ -34,12 +45,24 @@
#define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull) #define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull)
#define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull) #define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull)
#define ASCON_HASHA_IV0 WORD_T(0x01470194fc6528a6ull)
#define ASCON_HASHA_IV1 WORD_T(0x738ec38ac0adffa7ull)
#define ASCON_HASHA_IV2 WORD_T(0x2ec8e3296c76384cull)
#define ASCON_HASHA_IV3 WORD_T(0xd6f6a54d7f52377dull)
#define ASCON_HASHA_IV4 WORD_T(0xa13c42a223be8d87ull)
#define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull) #define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull)
#define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull) #define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull)
#define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull) #define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull)
#define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull) #define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull)
#define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull) #define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull)
#define ASCON_XOFA_IV0 WORD_T(0x44906568b77b9832ull)
#define ASCON_XOFA_IV1 WORD_T(0xcd8d6cae53455532ull)
#define ASCON_XOFA_IV2 WORD_T(0xf7b5212756422129ull)
#define ASCON_XOFA_IV3 WORD_T(0x246885e1de0d225bull)
#define ASCON_XOFA_IV4 WORD_T(0xa8cb5ce33449973full)
#define START(n) ((3 + (n)) << 4 | (12 - (n))) #define START(n) ((3 + (n)) << 4 | (12 - (n)))
#define RC(c) WORD_T(c) #define RC(c) WORD_T(c)
......
#define CRYPTO_VERSION "1.2.5"
#define CRYPTO_KEYBYTES 16 #define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0 #define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16 #define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16 #define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1 #define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16 #define ASCON_AEAD_RATE 16
...@@ -3,15 +3,14 @@ ...@@ -3,15 +3,14 @@
#include <stdint.h> #include <stdint.h>
#include "config.h"
#include "word.h" #include "word.h"
typedef struct { typedef struct {
word_t x0, x1, x2, x3, x4; word_t x0, x1, x2, x3, x4;
} state_t; } state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); void ascon_aeadinit(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k); void ascon_final(state_t* s, const uint8_t* k);
......
#ifndef CONFIG_H_ #ifndef CONFIG_H_
#define CONFIG_H_ #define CONFIG_H_
/* inline the Ascon mode */ /* inline the ascon mode */
#ifndef ASCON_INLINE_MODE #ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1 #define ASCON_INLINE_MODE 1
#endif #endif
/* inline the Ascon permutations */ /* inline all permutations */
#ifndef ASCON_INLINE_PERM #ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 0 #define ASCON_INLINE_PERM 0
#endif #endif
/* single function for all permutations */ /* unroll permutation loops */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 0
#endif
/* unroll the permutation loops */
#ifndef ASCON_UNROLL_LOOPS #ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 0 #define ASCON_UNROLL_LOOPS 1
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif #endif
#endif /* CONFIG_H_ */ #endif /* CONFIG_H_ */
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* macros for big endian machines */ /* macros for big endian machines */
#ifndef NDEBUG #ifdef PRAGMA_ENDIAN
#pragma message("Using macros for big endian machines") #pragma message("Using macros for big endian machines")
#endif #endif
#define U64BIG(x) (x) #define U64BIG(x) (x)
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
/* macros for little endian machines */ /* macros for little endian machines */
#ifndef NDEBUG #ifdef PRAGMA_ENDIAN
#pragma message("Using macros for little endian machines") #pragma message("Using macros for little endian machines")
#endif #endif
#define U64BIG(x) \ #define U64BIG(x) \
......
...@@ -3,7 +3,9 @@ ...@@ -3,7 +3,9 @@
#include <stdint.h> #include <stdint.h>
__forceinline uint32_t deinterleave_uint32(uint32_t x) { #include "forceinline.h"
forceinline uint32_t deinterleave_uint32(uint32_t x) {
uint32_t t; uint32_t t;
t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1);
t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2);
...@@ -12,7 +14,7 @@ __forceinline uint32_t deinterleave_uint32(uint32_t x) { ...@@ -12,7 +14,7 @@ __forceinline uint32_t deinterleave_uint32(uint32_t x) {
return x; return x;
} }
__forceinline uint32_t interleave_uint32(uint32_t x) { forceinline uint32_t interleave_uint32(uint32_t x) {
uint32_t t; uint32_t t;
t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8);
t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4);
...@@ -22,7 +24,7 @@ __forceinline uint32_t interleave_uint32(uint32_t x) { ...@@ -22,7 +24,7 @@ __forceinline uint32_t interleave_uint32(uint32_t x) {
} }
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ /* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t deinterleave32(uint64_t in) { forceinline uint64_t deinterleave32(uint64_t in) {
uint32_t hi = in >> 32; uint32_t hi = in >> 32;
uint32_t lo = in; uint32_t lo = in;
uint32_t r0, r1; uint32_t r0, r1;
...@@ -34,7 +36,7 @@ __forceinline uint64_t deinterleave32(uint64_t in) { ...@@ -34,7 +36,7 @@ __forceinline uint64_t deinterleave32(uint64_t in) {
} }
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ /* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t interleave32(uint64_t in) { forceinline uint64_t interleave32(uint64_t in) {
uint32_t r0 = in; uint32_t r0 = in;
uint32_t r1 = in >> 32; uint32_t r1 = in >> 32;
uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16);
......
#include "permutations.h" #include "permutations.h"
#include "round.h" #if !ASCON_UNROLL_LOOPS
#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM
const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9},
{0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9},
...@@ -10,35 +8,16 @@ const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, ...@@ -10,35 +8,16 @@ const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9},
#endif #endif
#if ASCON_INLINE_PERM #if !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
#elif ASCON_SINGLE_PERM
void P(state_t* s, uint8_t rounds) {
printstate(" permutation input", s);
for (int i = START(rounds); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ void P12(state_t* s) { P12ROUNDS(s); }
void P8(state_t* s) { P8ROUNDS(s); }
void P6(state_t* s) { P6ROUNDS(s); }
void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16
void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
#endif #endif
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 #if !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
void P6(state_t* s) {
printstate(" permutation input", s); void P(state_t* s, int nr) { PROUNDS(s, nr); }
P6ROUNDS(s);
}
#endif
#endif #endif
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include "api.h" #include "api.h"
#include "ascon.h" #include "ascon.h"
#include "config.h"
#include "printstate.h" #include "printstate.h"
#include "round.h" #include "round.h"
...@@ -14,154 +15,124 @@ ...@@ -14,154 +15,124 @@
#define ASCON_128_RATE 8 #define ASCON_128_RATE 8
#define ASCON_128A_RATE 16 #define ASCON_128A_RATE 16
#define ASCON_HASH_RATE 8
#define ASCON_128_PA_ROUNDS 12 #define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6 #define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PA_ROUNDS 12
#define ASCON_128A_PB_ROUNDS 8 #define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32 #define ASCON_HASH_PA_ROUNDS 12
#define ASCON_HASH_PB_ROUNDS 12
#define ASCON_128_IV WORD_T(0x8021000008220000) #define ASCON_HASHA_PA_ROUNDS 12
#define ASCON_128A_IV WORD_T(0x8822000000200000) #define ASCON_HASHA_PB_ROUNDS 8
#define ASCON_80PQ_IV WORD_T(0xc021000008220000)
#define ASCON_HASH_IV WORD_T(0x0020000008020010)
#define ASCON_XOF_IV WORD_T(0x0020000008020000)
#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7)
#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340)
#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8)
#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98)
#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398)
#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6)
#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220)
#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04)
#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2)
#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#if ASCON_RATE == 16 #define ASCON_HASH_BYTES 32
#define IV ASCON_128A_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define PB P8
#endif
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 #define ASCON_128_IV WORD_T(0x8021000008220000ull)
#define IV ASCON_80PQ_IV #define ASCON_128A_IV WORD_T(0x8822000000200000ull)
#define PA_ROUNDS 12 #define ASCON_80PQ_IV WORD_T(0xc021000008220000ull)
#define PB_ROUNDS 6 #define ASCON_HASH_IV WORD_T(0x0020000008020010ull)
#define PB P6 #define ASCON_XOF_IV WORD_T(0x0020000008020000ull)
#endif
#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7ull)
#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340ull)
#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8ull)
#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98ull)
#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398ull)
#define ASCON_HASHA_IV0 WORD_T(0x0108e46d1b16eb02ull)
#define ASCON_HASHA_IV1 WORD_T(0x5b9b8efdd29083f3ull)
#define ASCON_HASHA_IV2 WORD_T(0x7ad665622891ae4aull)
#define ASCON_HASHA_IV3 WORD_T(0x9dc27156ee3bfc7full)
#define ASCON_HASHA_IV4 WORD_T(0xc61d5fa916801633ull)
#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6ull)
#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220ull)
#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04ull)
#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2ull)
#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656ull)
#define ASCON_XOFA_IV0 WORD_T(0x0846d7a5a4b87d44ull)
#define ASCON_XOFA_IV1 WORD_T(0xaa6f1005b3a2dbf4ull)
#define ASCON_XOFA_IV2 WORD_T(0xdc451146f713e811ull)
#define ASCON_XOFA_IV3 WORD_T(0x468cb2532839e30dull)
#define ASCON_XOFA_IV4 WORD_T(0xeb2d429709e96977ull)
#define START(n) (12 - n) #define START(n) (12 - n)
#define RC(e, o) WORD_T((uint64_t)o << 32 | e)
#if ASCON_UNROLL_LOOPS
forceinline void P12ROUNDS(state_t* s) {
__forceinline void P12ROUNDS(state_t* s) { ROUND(s, RC(0xc, 0xc));
ROUND(s, 0xc, 0xc); ROUND(s, RC(0x9, 0xc));
ROUND(s, 0x9, 0xc); ROUND(s, RC(0xc, 0x9));
ROUND(s, 0xc, 0x9); ROUND(s, RC(0x9, 0x9));
ROUND(s, 0x9, 0x9); ROUND(s, RC(0x6, 0xc));
ROUND(s, 0x6, 0xc); ROUND(s, RC(0x3, 0xc));
ROUND(s, 0x3, 0xc); ROUND(s, RC(0x6, 0x9));
ROUND(s, 0x6, 0x9); ROUND(s, RC(0x3, 0x9));
ROUND(s, 0x3, 0x9); ROUND(s, RC(0xc, 0x6));
ROUND(s, 0xc, 0x6); ROUND(s, RC(0x9, 0x6));
ROUND(s, 0x9, 0x6); ROUND(s, RC(0xc, 0x3));
ROUND(s, 0xc, 0x3); ROUND(s, RC(0x9, 0x3));
ROUND(s, 0x9, 0x3);
} }
__forceinline void P8ROUNDS(state_t* s) { forceinline void P8ROUNDS(state_t* s) {
ROUND(s, 0x6, 0xc); ROUND(s, RC(0x6, 0xc));
ROUND(s, 0x3, 0xc); ROUND(s, RC(0x3, 0xc));
ROUND(s, 0x6, 0x9); ROUND(s, RC(0x6, 0x9));
ROUND(s, 0x3, 0x9); ROUND(s, RC(0x3, 0x9));
ROUND(s, 0xc, 0x6); ROUND(s, RC(0xc, 0x6));
ROUND(s, 0x9, 0x6); ROUND(s, RC(0x9, 0x6));
ROUND(s, 0xc, 0x3); ROUND(s, RC(0xc, 0x3));
ROUND(s, 0x9, 0x3); ROUND(s, RC(0x9, 0x3));
} }
__forceinline void P6ROUNDS(state_t* s) { forceinline void P6ROUNDS(state_t* s) {
ROUND(s, 0x6, 0x9); ROUND(s, RC(0x6, 0x9));
ROUND(s, 0x3, 0x9); ROUND(s, RC(0x3, 0x9));
ROUND(s, 0xc, 0x6); ROUND(s, RC(0xc, 0x6));
ROUND(s, 0x9, 0x6); ROUND(s, RC(0x9, 0x6));
ROUND(s, 0xc, 0x3); ROUND(s, RC(0xc, 0x3));
ROUND(s, 0x9, 0x3); ROUND(s, RC(0x9, 0x3));
} }
#else /* !ASCON_UNROLL_LOOPS */
extern const uint8_t constants[][2]; extern const uint8_t constants[][2];
__forceinline void P12ROUNDS(state_t* s) { forceinline void PROUNDS(state_t* s, int nr) {
for (int i = START(12); i < 12; i++) for (int i = START(nr); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]); ROUND(s, RC(constants[i][0], constants[i][1]));
} }
__forceinline void P8ROUNDS(state_t* s) { #if ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
for (int i = START(8); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
__forceinline void P6ROUNDS(state_t* s) { forceinline void P(state_t* s, int nr) {
for (int i = START(6); i < 12; i++) if (nr == 12) P12ROUNDS(s);
ROUND(s, constants[i][0], constants[i][1]); if (nr == 8) P8ROUNDS(s);
if (nr == 6) P6ROUNDS(s);
} }
#endif #elif !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
#if ASCON_INLINE_PERM
__forceinline void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
__forceinline void P8(state_t* s) { void P12(state_t* s);
printstate(" permutation input", s); void P8(state_t* s);
P8ROUNDS(s); void P6(state_t* s);
}
__forceinline void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
__forceinline void P(state_t* s, int i) { forceinline void P(state_t* s, int nr) {
if (i == 12) P12(s); if (nr == 12) P12(s);
if (i == 8) P8(s); if (nr == 8) P8(s);
if (i == 6) P6(s); if (nr == 6) P6(s);
} }
#elif ASCON_SINGLE_PERM #elif ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
#define P12(s) P(s, 12) forceinline void P(state_t* s, int nr) { PROUNDS(s, nr); }
#define P8(s) P(s, 8)
#define P6(s) P(s, 6)
void P(state_t* s, uint8_t rounds); #else /* !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS */
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ void P(state_t* s, int nr);
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#endif #endif
......
#ifndef PRINTSTATE_H_ #ifndef PRINTSTATE_H_
#define PRINTSTATE_H_ #define PRINTSTATE_H_
#ifdef NDEBUG #ifdef ASCON_PRINTSTATE
#define printword(text, w) #include "ascon.h"
#define printstate(text, s) #include "word.h"
#else void printword(const char* text, const word_t x);
void printstate(const char* text, const state_t* s);
#include <inttypes.h> #else
#include <stdio.h>
#include "ascon.h" #define printword(text, w) \
#include "word.h" do { \
} while (0)
__forceinline void printword(const char* text, const word_t x) { #define printstate(text, s) \
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); do { \
} } while (0)
__forceinline void printstate(const char* text, const state_t* s) {
printf("%s:\n", text);
printword(" x0", s->x0);
printword(" x1", s->x1);
printword(" x2", s->x2);
printword(" x3", s->x3);
printword(" x4", s->x4);
}
#endif #endif
......
...@@ -4,13 +4,13 @@ ...@@ -4,13 +4,13 @@
#include "ascon.h" #include "ascon.h"
#include "printstate.h" #include "printstate.h"
__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
*K0 = WORD_T(0); *K0 = WORD_T(0);
*K1 = WORD_T(0); *K1 = WORD_T(0);
*K2 = WORD_T(0); *K2 = WORD_T(0);
} }
__forceinline void PINIT(state_t* s) { forceinline void PINIT(state_t* s) {
s->x0 = WORD_T(0); s->x0 = WORD_T(0);
s->x1 = WORD_T(0); s->x1 = WORD_T(0);
s->x2 = WORD_T(0); s->x2 = WORD_T(0);
...@@ -18,67 +18,34 @@ __forceinline void PINIT(state_t* s) { ...@@ -18,67 +18,34 @@ __forceinline void PINIT(state_t* s) {
s->x4 = WORD_T(0); s->x4 = WORD_T(0);
} }
__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { forceinline void ROUND(state_t* s, word_t C) {
state_t t; state_t t;
/* round constant */ /* round constant */
s->x2.e ^= C_e; s->x2 = XOR(s->x2, C);
s->x2.o ^= C_o;
/* s-box layer */ /* s-box layer */
s->x0.e ^= s->x4.e; s->x0 = XOR(s->x0, s->x4);
s->x0.o ^= s->x4.o; s->x4 = XOR(s->x4, s->x3);
s->x4.e ^= s->x3.e; s->x2 = XOR(s->x2, s->x1);
s->x4.o ^= s->x3.o; t.x0 = XOR(s->x0, AND(NOT(s->x1), s->x2));
s->x2.e ^= s->x1.e; t.x2 = XOR(s->x2, AND(NOT(s->x3), s->x4));
s->x2.o ^= s->x1.o; t.x4 = XOR(s->x4, AND(NOT(s->x0), s->x1));
t.x0.e = s->x0.e; t.x1 = XOR(s->x1, AND(NOT(s->x2), s->x3));
t.x0.o = s->x0.o; t.x3 = XOR(s->x3, AND(NOT(s->x4), s->x0));
t.x4.e = s->x4.e; t.x1 = XOR(t.x1, t.x0);
t.x4.o = s->x4.o; t.x3 = XOR(t.x3, t.x2);
t.x3.e = s->x3.e; t.x0 = XOR(t.x0, t.x4);
t.x3.o = s->x3.o;
t.x1.e = s->x1.e;
t.x1.o = s->x1.o;
t.x2.e = s->x2.e;
t.x2.o = s->x2.o;
s->x0.e = t.x0.e ^ (~t.x1.e & t.x2.e);
s->x0.o = t.x0.o ^ (~t.x1.o & t.x2.o);
s->x2.e = t.x2.e ^ (~t.x3.e & t.x4.e);
s->x2.o = t.x2.o ^ (~t.x3.o & t.x4.o);
s->x4.e = t.x4.e ^ (~t.x0.e & t.x1.e);
s->x4.o = t.x4.o ^ (~t.x0.o & t.x1.o);
s->x1.e = t.x1.e ^ (~t.x2.e & t.x3.e);
s->x1.o = t.x1.o ^ (~t.x2.o & t.x3.o);
s->x3.e = t.x3.e ^ (~t.x4.e & t.x0.e);
s->x3.o = t.x3.o ^ (~t.x4.o & t.x0.o);
s->x1.e ^= s->x0.e;
s->x1.o ^= s->x0.o;
s->x3.e ^= s->x2.e;
s->x3.o ^= s->x2.o;
s->x0.e ^= s->x4.e;
s->x0.o ^= s->x4.o;
/* linear layer */ /* linear layer */
t.x0.e = s->x0.e ^ ROR32(s->x0.o, 4); s->x2 = XOR(t.x2, ROR(t.x2, 6 - 1));
t.x0.o = s->x0.o ^ ROR32(s->x0.e, 5); s->x3 = XOR(t.x3, ROR(t.x3, 17 - 10));
t.x1.e = s->x1.e ^ ROR32(s->x1.e, 11); s->x4 = XOR(t.x4, ROR(t.x4, 41 - 7));
t.x1.o = s->x1.o ^ ROR32(s->x1.o, 11); s->x0 = XOR(t.x0, ROR(t.x0, 28 - 19));
t.x2.e = s->x2.e ^ ROR32(s->x2.o, 2); s->x1 = XOR(t.x1, ROR(t.x1, 61 - 39));
t.x2.o = s->x2.o ^ ROR32(s->x2.e, 3); s->x2 = XOR(t.x2, ROR(s->x2, 1));
t.x3.e = s->x3.e ^ ROR32(s->x3.o, 3); s->x3 = XOR(t.x3, ROR(s->x3, 10));
t.x3.o = s->x3.o ^ ROR32(s->x3.e, 4); s->x4 = XOR(t.x4, ROR(s->x4, 7));
t.x4.e = s->x4.e ^ ROR32(s->x4.e, 17); s->x0 = XOR(t.x0, ROR(s->x0, 19));
t.x4.o = s->x4.o ^ ROR32(s->x4.o, 17); s->x1 = XOR(t.x1, ROR(s->x1, 39));
s->x0.e ^= ROR32(t.x0.o, 9); s->x2 = NOT(s->x2);
s->x0.o ^= ROR32(t.x0.e, 10);
s->x1.e ^= ROR32(t.x1.o, 19);
s->x1.o ^= ROR32(t.x1.e, 20);
s->x2.e ^= t.x2.o;
s->x2.o ^= ROR32(t.x2.e, 1);
s->x3.e ^= ROR32(t.x3.e, 5);
s->x3.o ^= ROR32(t.x3.o, 5);
s->x4.e ^= ROR32(t.x4.o, 3);
s->x4.o ^= ROR32(t.x4.e, 4);
s->x2.e = ~s->x2.e;
s->x2.o = ~s->x2.o;
printstate(" round output", s); printstate(" round output", s);
} }
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <stdint.h> #include <stdint.h>
#include "endian.h" #include "endian.h"
#include "forceinline.h"
#include "interleave.h" #include "interleave.h"
typedef struct { typedef struct {
...@@ -11,102 +12,92 @@ typedef struct { ...@@ -11,102 +12,92 @@ typedef struct {
uint32_t o; uint32_t o;
} word_t; } word_t;
__forceinline uint32_t ROR32(uint32_t x, int n) { forceinline uint32_t ROR32(uint32_t x, int n) {
return (n == 0) ? x : x >> n | x << (32 - n); return (n == 0) ? x : x >> n | x << (32 - n);
} }
__forceinline word_t ROR64(word_t x, int n) { forceinline word_t ROR(word_t x, int n) {
word_t r; word_t r;
r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2);
r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2);
return r; return r;
} }
__forceinline word_t WORD_T(uint64_t x) { forceinline word_t WORD_T(uint64_t x) { return (word_t){.o = x >> 32, .e = x}; }
return (word_t){.o = x >> 32, .e = x};
}
__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; }
__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); } forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); }
__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); } forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); }
__forceinline word_t NOT(word_t a) { forceinline word_t NOT(word_t a) {
a.e = ~a.e; a.e = ~a.e;
a.o = ~a.o; a.o = ~a.o;
return a; return a;
} }
__forceinline word_t XOR(word_t a, word_t b) { forceinline word_t XOR(word_t a, word_t b) {
a.e ^= b.e; a.e ^= b.e;
a.o ^= b.o; a.o ^= b.o;
return a; return a;
} }
__forceinline word_t AND(word_t a, word_t b) { forceinline word_t AND(word_t a, word_t b) {
a.e &= b.e; a.e &= b.e;
a.o &= b.o; a.o &= b.o;
return a; return a;
} }
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
word_t r; word_t r;
r.o = lo2hi.o << 16 | hi2lo.o >> 16;
r.e = lo2hi.e << 16 | hi2lo.e >> 16; r.e = lo2hi.e << 16 | hi2lo.e >> 16;
r.o = lo2hi.o << 16 | hi2lo.o >> 16;
return r; return r;
} }
__forceinline uint8_t NOTZERO(word_t a, word_t b) { forceinline int NOTZERO(word_t a, word_t b) {
uint32_t result = a.e | a.o | b.e | b.o; uint32_t result = a.e | a.o | b.e | b.o;
result |= result >> 16; result |= result >> 16;
result |= result >> 8; result |= result >> 8;
return (uint8_t)result; return ((((int)(result & 0xff) - 1) >> 8) & 1) - 1;
} }
__forceinline word_t PAD(int i) { forceinline word_t PAD(int i) {
return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); return WORD_T((uint64_t)(0x8ul << (28 - 4 * i)) << 32);
} }
__forceinline word_t CLEAR(word_t w, int n) { forceinline word_t CLEAR(word_t w, int n) {
/* undefined for n == 0 */ /* undefined for n == 0 */
uint32_t mask = 0x0fffffff >> (n * 4 - 4); uint32_t mask = 0x0fffffff >> (n * 4 - 4);
return AND(w, WORD_T((uint64_t)mask << 32 | mask)); w.e &= mask;
w.o &= mask;
return w;
} }
__forceinline uint64_t MASK(int n) { forceinline uint64_t MASK(int n) {
/* undefined for n == 0 */ /* undefined for n == 0 */
return ~0ull >> (64 - 8 * n); return ~0ull >> (64 - 8 * n);
} }
__forceinline word_t LOAD64(const uint8_t* bytes) { forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes;
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes & MASK(n); uint64_t x = *(uint64_t*)bytes & MASK(n);
return U64TOWORD(U64BIG(x)); return U64TOWORD(U64BIG(x));
} }
__forceinline void STORE(uint8_t* bytes, word_t w, int n) { forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w); uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(n); *(uint64_t*)bytes &= ~MASK(n);
*(uint64_t*)bytes |= U64BIG(x); *(uint64_t*)bytes |= U64BIG(x);
} }
__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
uint64_t x = 0; uint64_t x = 0;
for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i];
return U64TOWORD(x); return U64TOWORD(x);
} }
__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w); uint64_t x = WORDTOU64(w);
for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i];
} }
......
#define CRYPTO_VERSION "1.2.5"
#define CRYPTO_KEYBYTES 16 #define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0 #define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16 #define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16 #define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1 #define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16 #define ASCON_AEAD_RATE 16
...@@ -3,15 +3,14 @@ ...@@ -3,15 +3,14 @@
#include <stdint.h> #include <stdint.h>
#include "config.h"
#include "word.h" #include "word.h"
typedef struct { typedef struct {
word_t x0, x1, x2, x3, x4; word_t x0, x1, x2, x3, x4;
} state_t; } state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); void ascon_aeadinit(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k); void ascon_final(state_t* s, const uint8_t* k);
......
#ifndef CONFIG_H_ #ifndef CONFIG_H_
#define CONFIG_H_ #define CONFIG_H_
/* inline the Ascon mode */ /* inline the ascon mode */
#ifndef ASCON_INLINE_MODE #ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1 #define ASCON_INLINE_MODE 0
#endif #endif
/* inline the Ascon permutations */ /* inline all permutations */
#ifndef ASCON_INLINE_PERM #ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 0 #define ASCON_INLINE_PERM 0
#endif #endif
/* single function for all permutations */ /* unroll permutation loops */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 0
#endif
/* unroll the permutation loops */
#ifndef ASCON_UNROLL_LOOPS #ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 1 #define ASCON_UNROLL_LOOPS 1
#endif #endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif
#endif /* CONFIG_H_ */ #endif /* CONFIG_H_ */
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* macros for big endian machines */ /* macros for big endian machines */
#ifndef NDEBUG #ifdef PRAGMA_ENDIAN
#pragma message("Using macros for big endian machines") #pragma message("Using macros for big endian machines")
#endif #endif
#define U64BIG(x) (x) #define U64BIG(x) (x)
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
/* macros for little endian machines */ /* macros for little endian machines */
#ifndef NDEBUG #ifdef PRAGMA_ENDIAN
#pragma message("Using macros for little endian machines") #pragma message("Using macros for little endian machines")
#endif #endif
#define U64BIG(x) \ #define U64BIG(x) \
......
...@@ -3,7 +3,9 @@ ...@@ -3,7 +3,9 @@
#include <stdint.h> #include <stdint.h>
__forceinline uint32_t deinterleave_uint32(uint32_t x) { #include "forceinline.h"
forceinline uint32_t deinterleave_uint32(uint32_t x) {
uint32_t t; uint32_t t;
t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1);
t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2);
...@@ -12,7 +14,7 @@ __forceinline uint32_t deinterleave_uint32(uint32_t x) { ...@@ -12,7 +14,7 @@ __forceinline uint32_t deinterleave_uint32(uint32_t x) {
return x; return x;
} }
__forceinline uint32_t interleave_uint32(uint32_t x) { forceinline uint32_t interleave_uint32(uint32_t x) {
uint32_t t; uint32_t t;
t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8);
t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4);
...@@ -22,7 +24,7 @@ __forceinline uint32_t interleave_uint32(uint32_t x) { ...@@ -22,7 +24,7 @@ __forceinline uint32_t interleave_uint32(uint32_t x) {
} }
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ /* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t deinterleave32(uint64_t in) { forceinline uint64_t deinterleave32(uint64_t in) {
uint32_t hi = in >> 32; uint32_t hi = in >> 32;
uint32_t lo = in; uint32_t lo = in;
uint32_t r0, r1; uint32_t r0, r1;
...@@ -34,7 +36,7 @@ __forceinline uint64_t deinterleave32(uint64_t in) { ...@@ -34,7 +36,7 @@ __forceinline uint64_t deinterleave32(uint64_t in) {
} }
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ /* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t interleave32(uint64_t in) { forceinline uint64_t interleave32(uint64_t in) {
uint32_t r0 = in; uint32_t r0 = in;
uint32_t r1 = in >> 32; uint32_t r1 = in >> 32;
uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16);
......
#include "permutations.h" #include "permutations.h"
#include "round.h" #if !ASCON_UNROLL_LOOPS
#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM
const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9},
{0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9},
...@@ -10,35 +8,16 @@ const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, ...@@ -10,35 +8,16 @@ const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9},
#endif #endif
#if ASCON_INLINE_PERM #if !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
#elif ASCON_SINGLE_PERM
void P(state_t* s, uint8_t rounds) {
printstate(" permutation input", s);
for (int i = START(rounds); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ void P12(state_t* s) { P12ROUNDS(s); }
void P8(state_t* s) { P8ROUNDS(s); }
void P6(state_t* s) { P6ROUNDS(s); }
void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16
void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
#endif #endif
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 #if !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
void P6(state_t* s) {
printstate(" permutation input", s); void P(state_t* s, int nr) { PROUNDS(s, nr); }
P6ROUNDS(s);
}
#endif
#endif #endif
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include "api.h" #include "api.h"
#include "ascon.h" #include "ascon.h"
#include "config.h"
#include "printstate.h" #include "printstate.h"
#include "round.h" #include "round.h"
...@@ -14,154 +15,124 @@ ...@@ -14,154 +15,124 @@
#define ASCON_128_RATE 8 #define ASCON_128_RATE 8
#define ASCON_128A_RATE 16 #define ASCON_128A_RATE 16
#define ASCON_HASH_RATE 8
#define ASCON_128_PA_ROUNDS 12 #define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6 #define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PA_ROUNDS 12
#define ASCON_128A_PB_ROUNDS 8 #define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32 #define ASCON_HASH_PA_ROUNDS 12
#define ASCON_HASH_PB_ROUNDS 12
#define ASCON_128_IV WORD_T(0x8021000008220000) #define ASCON_HASHA_PA_ROUNDS 12
#define ASCON_128A_IV WORD_T(0x8822000000200000) #define ASCON_HASHA_PB_ROUNDS 8
#define ASCON_80PQ_IV WORD_T(0xc021000008220000)
#define ASCON_HASH_IV WORD_T(0x0020000008020010)
#define ASCON_XOF_IV WORD_T(0x0020000008020000)
#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7)
#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340)
#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8)
#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98)
#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398)
#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6)
#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220)
#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04)
#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2)
#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#if ASCON_RATE == 16 #define ASCON_HASH_BYTES 32
#define IV ASCON_128A_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define PB P8
#endif
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 #define ASCON_128_IV WORD_T(0x8021000008220000ull)
#define IV ASCON_80PQ_IV #define ASCON_128A_IV WORD_T(0x8822000000200000ull)
#define PA_ROUNDS 12 #define ASCON_80PQ_IV WORD_T(0xc021000008220000ull)
#define PB_ROUNDS 6 #define ASCON_HASH_IV WORD_T(0x0020000008020010ull)
#define PB P6 #define ASCON_XOF_IV WORD_T(0x0020000008020000ull)
#endif
#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7ull)
#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340ull)
#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8ull)
#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98ull)
#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398ull)
#define ASCON_HASHA_IV0 WORD_T(0x0108e46d1b16eb02ull)
#define ASCON_HASHA_IV1 WORD_T(0x5b9b8efdd29083f3ull)
#define ASCON_HASHA_IV2 WORD_T(0x7ad665622891ae4aull)
#define ASCON_HASHA_IV3 WORD_T(0x9dc27156ee3bfc7full)
#define ASCON_HASHA_IV4 WORD_T(0xc61d5fa916801633ull)
#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6ull)
#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220ull)
#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04ull)
#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2ull)
#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656ull)
#define ASCON_XOFA_IV0 WORD_T(0x0846d7a5a4b87d44ull)
#define ASCON_XOFA_IV1 WORD_T(0xaa6f1005b3a2dbf4ull)
#define ASCON_XOFA_IV2 WORD_T(0xdc451146f713e811ull)
#define ASCON_XOFA_IV3 WORD_T(0x468cb2532839e30dull)
#define ASCON_XOFA_IV4 WORD_T(0xeb2d429709e96977ull)
#define START(n) (12 - n) #define START(n) (12 - n)
#define RC(e, o) WORD_T((uint64_t)o << 32 | e)
#if ASCON_UNROLL_LOOPS
forceinline void P12ROUNDS(state_t* s) {
__forceinline void P12ROUNDS(state_t* s) { ROUND(s, RC(0xc, 0xc));
ROUND(s, 0xc, 0xc); ROUND(s, RC(0x9, 0xc));
ROUND(s, 0x9, 0xc); ROUND(s, RC(0xc, 0x9));
ROUND(s, 0xc, 0x9); ROUND(s, RC(0x9, 0x9));
ROUND(s, 0x9, 0x9); ROUND(s, RC(0x6, 0xc));
ROUND(s, 0x6, 0xc); ROUND(s, RC(0x3, 0xc));
ROUND(s, 0x3, 0xc); ROUND(s, RC(0x6, 0x9));
ROUND(s, 0x6, 0x9); ROUND(s, RC(0x3, 0x9));
ROUND(s, 0x3, 0x9); ROUND(s, RC(0xc, 0x6));
ROUND(s, 0xc, 0x6); ROUND(s, RC(0x9, 0x6));
ROUND(s, 0x9, 0x6); ROUND(s, RC(0xc, 0x3));
ROUND(s, 0xc, 0x3); ROUND(s, RC(0x9, 0x3));
ROUND(s, 0x9, 0x3);
} }
__forceinline void P8ROUNDS(state_t* s) { forceinline void P8ROUNDS(state_t* s) {
ROUND(s, 0x6, 0xc); ROUND(s, RC(0x6, 0xc));
ROUND(s, 0x3, 0xc); ROUND(s, RC(0x3, 0xc));
ROUND(s, 0x6, 0x9); ROUND(s, RC(0x6, 0x9));
ROUND(s, 0x3, 0x9); ROUND(s, RC(0x3, 0x9));
ROUND(s, 0xc, 0x6); ROUND(s, RC(0xc, 0x6));
ROUND(s, 0x9, 0x6); ROUND(s, RC(0x9, 0x6));
ROUND(s, 0xc, 0x3); ROUND(s, RC(0xc, 0x3));
ROUND(s, 0x9, 0x3); ROUND(s, RC(0x9, 0x3));
} }
__forceinline void P6ROUNDS(state_t* s) { forceinline void P6ROUNDS(state_t* s) {
ROUND(s, 0x6, 0x9); ROUND(s, RC(0x6, 0x9));
ROUND(s, 0x3, 0x9); ROUND(s, RC(0x3, 0x9));
ROUND(s, 0xc, 0x6); ROUND(s, RC(0xc, 0x6));
ROUND(s, 0x9, 0x6); ROUND(s, RC(0x9, 0x6));
ROUND(s, 0xc, 0x3); ROUND(s, RC(0xc, 0x3));
ROUND(s, 0x9, 0x3); ROUND(s, RC(0x9, 0x3));
} }
#else /* !ASCON_UNROLL_LOOPS */
extern const uint8_t constants[][2]; extern const uint8_t constants[][2];
__forceinline void P12ROUNDS(state_t* s) { forceinline void PROUNDS(state_t* s, int nr) {
for (int i = START(12); i < 12; i++) for (int i = START(nr); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]); ROUND(s, RC(constants[i][0], constants[i][1]));
} }
__forceinline void P8ROUNDS(state_t* s) { #if ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
for (int i = START(8); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
__forceinline void P6ROUNDS(state_t* s) { forceinline void P(state_t* s, int nr) {
for (int i = START(6); i < 12; i++) if (nr == 12) P12ROUNDS(s);
ROUND(s, constants[i][0], constants[i][1]); if (nr == 8) P8ROUNDS(s);
if (nr == 6) P6ROUNDS(s);
} }
#endif #elif !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
#if ASCON_INLINE_PERM
__forceinline void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
__forceinline void P8(state_t* s) { void P12(state_t* s);
printstate(" permutation input", s); void P8(state_t* s);
P8ROUNDS(s); void P6(state_t* s);
}
__forceinline void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
__forceinline void P(state_t* s, int i) { forceinline void P(state_t* s, int nr) {
if (i == 12) P12(s); if (nr == 12) P12(s);
if (i == 8) P8(s); if (nr == 8) P8(s);
if (i == 6) P6(s); if (nr == 6) P6(s);
} }
#elif ASCON_SINGLE_PERM #elif ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
#define P12(s) P(s, 12) forceinline void P(state_t* s, int nr) { PROUNDS(s, nr); }
#define P8(s) P(s, 8)
#define P6(s) P(s, 6)
void P(state_t* s, uint8_t rounds); #else /* !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS */
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ void P(state_t* s, int nr);
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#endif #endif
......
#ifndef PRINTSTATE_H_ #ifndef PRINTSTATE_H_
#define PRINTSTATE_H_ #define PRINTSTATE_H_
#ifdef NDEBUG #ifdef ASCON_PRINTSTATE
#define printword(text, w) #include "ascon.h"
#define printstate(text, s) #include "word.h"
#else void printword(const char* text, const word_t x);
void printstate(const char* text, const state_t* s);
#include <inttypes.h> #else
#include <stdio.h>
#include "ascon.h" #define printword(text, w) \
#include "word.h" do { \
} while (0)
__forceinline void printword(const char* text, const word_t x) { #define printstate(text, s) \
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); do { \
} } while (0)
__forceinline void printstate(const char* text, const state_t* s) {
printf("%s:\n", text);
printword(" x0", s->x0);
printword(" x1", s->x1);
printword(" x2", s->x2);
printword(" x3", s->x3);
printword(" x4", s->x4);
}
#endif #endif
......
...@@ -4,13 +4,13 @@ ...@@ -4,13 +4,13 @@
#include "ascon.h" #include "ascon.h"
#include "printstate.h" #include "printstate.h"
__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
*K0 = WORD_T(0); *K0 = WORD_T(0);
*K1 = WORD_T(0); *K1 = WORD_T(0);
*K2 = WORD_T(0); *K2 = WORD_T(0);
} }
__forceinline void PINIT(state_t* s) { forceinline void PINIT(state_t* s) {
s->x0 = WORD_T(0); s->x0 = WORD_T(0);
s->x1 = WORD_T(0); s->x1 = WORD_T(0);
s->x2 = WORD_T(0); s->x2 = WORD_T(0);
...@@ -18,7 +18,7 @@ __forceinline void PINIT(state_t* s) { ...@@ -18,7 +18,7 @@ __forceinline void PINIT(state_t* s) {
s->x4 = WORD_T(0); s->x4 = WORD_T(0);
} }
__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { forceinline void ROUND(state_t* s, word_t C) {
uint32_t tmp0, tmp1, tmp2, tmp3; uint32_t tmp0, tmp1, tmp2, tmp3;
/* clang-format off */ /* clang-format off */
__asm__ __volatile__( \ __asm__ __volatile__( \
...@@ -92,8 +92,8 @@ __forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { ...@@ -92,8 +92,8 @@ __forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) {
[ tmp1 ] "=r"(tmp1), \ [ tmp1 ] "=r"(tmp1), \
[ tmp2 ] "=r"(tmp2), \ [ tmp2 ] "=r"(tmp2), \
[ tmp3 ] "=r"(tmp3) \ [ tmp3 ] "=r"(tmp3) \
: [ C_e ] "i"(C_e), \ : [ C_e ] "ri"(C.e), \
[ C_o ] "i"(C_o) \ [ C_o ] "ri"(C.o) \
: ); : );
/* clang-format on */ /* clang-format on */
printstate(" round output", s); printstate(" round output", s);
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <stdint.h> #include <stdint.h>
#include "endian.h" #include "endian.h"
#include "forceinline.h"
#include "interleave.h" #include "interleave.h"
typedef struct { typedef struct {
...@@ -11,102 +12,92 @@ typedef struct { ...@@ -11,102 +12,92 @@ typedef struct {
uint32_t o; uint32_t o;
} word_t; } word_t;
__forceinline uint32_t ROR32(uint32_t x, int n) { forceinline uint32_t ROR32(uint32_t x, int n) {
return (n == 0) ? x : x >> n | x << (32 - n); return (n == 0) ? x : x >> n | x << (32 - n);
} }
__forceinline word_t ROR64(word_t x, int n) { forceinline word_t ROR(word_t x, int n) {
word_t r; word_t r;
r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2);
r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2);
return r; return r;
} }
__forceinline word_t WORD_T(uint64_t x) { forceinline word_t WORD_T(uint64_t x) { return (word_t){.o = x >> 32, .e = x}; }
return (word_t){.o = x >> 32, .e = x};
}
__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; }
__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); } forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); }
__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); } forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); }
__forceinline word_t NOT(word_t a) { forceinline word_t NOT(word_t a) {
a.e = ~a.e; a.e = ~a.e;
a.o = ~a.o; a.o = ~a.o;
return a; return a;
} }
__forceinline word_t XOR(word_t a, word_t b) { forceinline word_t XOR(word_t a, word_t b) {
a.e ^= b.e; a.e ^= b.e;
a.o ^= b.o; a.o ^= b.o;
return a; return a;
} }
__forceinline word_t AND(word_t a, word_t b) { forceinline word_t AND(word_t a, word_t b) {
a.e &= b.e; a.e &= b.e;
a.o &= b.o; a.o &= b.o;
return a; return a;
} }
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
word_t r; word_t r;
r.o = lo2hi.o << 16 | hi2lo.o >> 16;
r.e = lo2hi.e << 16 | hi2lo.e >> 16; r.e = lo2hi.e << 16 | hi2lo.e >> 16;
r.o = lo2hi.o << 16 | hi2lo.o >> 16;
return r; return r;
} }
__forceinline uint8_t NOTZERO(word_t a, word_t b) { forceinline int NOTZERO(word_t a, word_t b) {
uint32_t result = a.e | a.o | b.e | b.o; uint32_t result = a.e | a.o | b.e | b.o;
result |= result >> 16; result |= result >> 16;
result |= result >> 8; result |= result >> 8;
return (uint8_t)result; return ((((int)(result & 0xff) - 1) >> 8) & 1) - 1;
} }
__forceinline word_t PAD(int i) { forceinline word_t PAD(int i) {
return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); return WORD_T((uint64_t)(0x8ul << (28 - 4 * i)) << 32);
} }
__forceinline word_t CLEAR(word_t w, int n) { forceinline word_t CLEAR(word_t w, int n) {
/* undefined for n == 0 */ /* undefined for n == 0 */
uint32_t mask = 0x0fffffff >> (n * 4 - 4); uint32_t mask = 0x0fffffff >> (n * 4 - 4);
return AND(w, WORD_T((uint64_t)mask << 32 | mask)); w.e &= mask;
w.o &= mask;
return w;
} }
__forceinline uint64_t MASK(int n) { forceinline uint64_t MASK(int n) {
/* undefined for n == 0 */ /* undefined for n == 0 */
return ~0ull >> (64 - 8 * n); return ~0ull >> (64 - 8 * n);
} }
__forceinline word_t LOAD64(const uint8_t* bytes) { forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes;
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes & MASK(n); uint64_t x = *(uint64_t*)bytes & MASK(n);
return U64TOWORD(U64BIG(x)); return U64TOWORD(U64BIG(x));
} }
__forceinline void STORE(uint8_t* bytes, word_t w, int n) { forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w); uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(n); *(uint64_t*)bytes &= ~MASK(n);
*(uint64_t*)bytes |= U64BIG(x); *(uint64_t*)bytes |= U64BIG(x);
} }
__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
uint64_t x = 0; uint64_t x = 0;
for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i];
return U64TOWORD(x); return U64TOWORD(x);
} }
__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w); uint64_t x = WORDTOU64(w);
for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i];
} }
......
#define CRYPTO_VERSION "1.2.5"
#define CRYPTO_KEYBYTES 16 #define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0 #define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16 #define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16 #define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1 #define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16 #define ASCON_AEAD_RATE 16
...@@ -3,15 +3,14 @@ ...@@ -3,15 +3,14 @@
#include <stdint.h> #include <stdint.h>
#include "config.h"
#include "word.h" #include "word.h"
typedef struct { typedef struct {
word_t x0, x1, x2, x3, x4; word_t x0, x1, x2, x3, x4;
} state_t; } state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); void ascon_aeadinit(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k); void ascon_final(state_t* s, const uint8_t* k);
......
#ifndef CONFIG_H_ #ifndef CONFIG_H_
#define CONFIG_H_ #define CONFIG_H_
/* inline the Ascon mode */ /* inline the ascon mode */
#ifndef ASCON_INLINE_MODE #ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1 #define ASCON_INLINE_MODE 0
#endif #endif
/* inline the Ascon permutations */ /* inline all permutations */
#ifndef ASCON_INLINE_PERM #ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 0 #define ASCON_INLINE_PERM 1
#endif #endif
/* single function for all permutations */ /* unroll permutation loops */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 0
#endif
/* unroll the permutation loops */
#ifndef ASCON_UNROLL_LOOPS #ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 1 #define ASCON_UNROLL_LOOPS 0
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif #endif
#endif /* CONFIG_H_ */ #endif /* CONFIG_H_ */
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* macros for big endian machines */ /* macros for big endian machines */
#ifndef NDEBUG #ifdef PRAGMA_ENDIAN
#pragma message("Using macros for big endian machines") #pragma message("Using macros for big endian machines")
#endif #endif
#define U64BIG(x) (x) #define U64BIG(x) (x)
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
/* macros for little endian machines */ /* macros for little endian machines */
#ifndef NDEBUG #ifdef PRAGMA_ENDIAN
#pragma message("Using macros for little endian machines") #pragma message("Using macros for little endian machines")
#endif #endif
#define U64BIG(x) \ #define U64BIG(x) \
......
...@@ -3,7 +3,9 @@ ...@@ -3,7 +3,9 @@
#include <stdint.h> #include <stdint.h>
__forceinline uint32_t deinterleave_uint32(uint32_t x) { #include "forceinline.h"
forceinline uint32_t deinterleave_uint32(uint32_t x) {
uint32_t t; uint32_t t;
t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1); t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1);
t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2); t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2);
...@@ -12,7 +14,7 @@ __forceinline uint32_t deinterleave_uint32(uint32_t x) { ...@@ -12,7 +14,7 @@ __forceinline uint32_t deinterleave_uint32(uint32_t x) {
return x; return x;
} }
__forceinline uint32_t interleave_uint32(uint32_t x) { forceinline uint32_t interleave_uint32(uint32_t x) {
uint32_t t; uint32_t t;
t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8); t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8);
t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4); t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4);
...@@ -22,7 +24,7 @@ __forceinline uint32_t interleave_uint32(uint32_t x) { ...@@ -22,7 +24,7 @@ __forceinline uint32_t interleave_uint32(uint32_t x) {
} }
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ /* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t deinterleave32(uint64_t in) { forceinline uint64_t deinterleave32(uint64_t in) {
uint32_t hi = in >> 32; uint32_t hi = in >> 32;
uint32_t lo = in; uint32_t lo = in;
uint32_t r0, r1; uint32_t r0, r1;
...@@ -34,7 +36,7 @@ __forceinline uint64_t deinterleave32(uint64_t in) { ...@@ -34,7 +36,7 @@ __forceinline uint64_t deinterleave32(uint64_t in) {
} }
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ /* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t interleave32(uint64_t in) { forceinline uint64_t interleave32(uint64_t in) {
uint32_t r0 = in; uint32_t r0 = in;
uint32_t r1 = in >> 32; uint32_t r1 = in >> 32;
uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16);
......
#include "permutations.h" #include "permutations.h"
#include "round.h" #if !ASCON_UNROLL_LOOPS
#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM
const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9},
{0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9},
...@@ -10,35 +8,16 @@ const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, ...@@ -10,35 +8,16 @@ const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9},
#endif #endif
#if ASCON_INLINE_PERM #if !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
#elif ASCON_SINGLE_PERM
void P(state_t* s, uint8_t rounds) {
printstate(" permutation input", s);
for (int i = START(rounds); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ void P12(state_t* s) { P12ROUNDS(s); }
void P8(state_t* s) { P8ROUNDS(s); }
void P6(state_t* s) { P6ROUNDS(s); }
void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16
void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
#endif #endif
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 #if !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
void P6(state_t* s) {
printstate(" permutation input", s); void P(state_t* s, int nr) { PROUNDS(s, nr); }
P6ROUNDS(s);
}
#endif
#endif #endif
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include "api.h" #include "api.h"
#include "ascon.h" #include "ascon.h"
#include "config.h"
#include "printstate.h" #include "printstate.h"
#include "round.h" #include "round.h"
...@@ -14,154 +15,124 @@ ...@@ -14,154 +15,124 @@
#define ASCON_128_RATE 8 #define ASCON_128_RATE 8
#define ASCON_128A_RATE 16 #define ASCON_128A_RATE 16
#define ASCON_HASH_RATE 8
#define ASCON_128_PA_ROUNDS 12 #define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6 #define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PA_ROUNDS 12
#define ASCON_128A_PB_ROUNDS 8 #define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32 #define ASCON_HASH_PA_ROUNDS 12
#define ASCON_HASH_PB_ROUNDS 12
#define ASCON_128_IV WORD_T(0x8021000008220000) #define ASCON_HASHA_PA_ROUNDS 12
#define ASCON_128A_IV WORD_T(0x8822000000200000) #define ASCON_HASHA_PB_ROUNDS 8
#define ASCON_80PQ_IV WORD_T(0xc021000008220000)
#define ASCON_HASH_IV WORD_T(0x0020000008020010)
#define ASCON_XOF_IV WORD_T(0x0020000008020000)
#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7)
#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340)
#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8)
#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98)
#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398)
#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6)
#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220)
#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04)
#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2)
#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#if ASCON_RATE == 16 #define ASCON_HASH_BYTES 32
#define IV ASCON_128A_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define PB P8
#endif
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 #define ASCON_128_IV WORD_T(0x8021000008220000ull)
#define IV ASCON_80PQ_IV #define ASCON_128A_IV WORD_T(0x8822000000200000ull)
#define PA_ROUNDS 12 #define ASCON_80PQ_IV WORD_T(0xc021000008220000ull)
#define PB_ROUNDS 6 #define ASCON_HASH_IV WORD_T(0x0020000008020010ull)
#define PB P6 #define ASCON_XOF_IV WORD_T(0x0020000008020000ull)
#endif
#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7ull)
#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340ull)
#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8ull)
#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98ull)
#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398ull)
#define ASCON_HASHA_IV0 WORD_T(0x0108e46d1b16eb02ull)
#define ASCON_HASHA_IV1 WORD_T(0x5b9b8efdd29083f3ull)
#define ASCON_HASHA_IV2 WORD_T(0x7ad665622891ae4aull)
#define ASCON_HASHA_IV3 WORD_T(0x9dc27156ee3bfc7full)
#define ASCON_HASHA_IV4 WORD_T(0xc61d5fa916801633ull)
#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6ull)
#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220ull)
#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04ull)
#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2ull)
#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656ull)
#define ASCON_XOFA_IV0 WORD_T(0x0846d7a5a4b87d44ull)
#define ASCON_XOFA_IV1 WORD_T(0xaa6f1005b3a2dbf4ull)
#define ASCON_XOFA_IV2 WORD_T(0xdc451146f713e811ull)
#define ASCON_XOFA_IV3 WORD_T(0x468cb2532839e30dull)
#define ASCON_XOFA_IV4 WORD_T(0xeb2d429709e96977ull)
#define START(n) (12 - n) #define START(n) (12 - n)
#define RC(e, o) WORD_T((uint64_t)o << 32 | e)
#if ASCON_UNROLL_LOOPS
forceinline void P12ROUNDS(state_t* s) {
__forceinline void P12ROUNDS(state_t* s) { ROUND(s, RC(0xc, 0xc));
ROUND(s, 0xc, 0xc); ROUND(s, RC(0x9, 0xc));
ROUND(s, 0x9, 0xc); ROUND(s, RC(0xc, 0x9));
ROUND(s, 0xc, 0x9); ROUND(s, RC(0x9, 0x9));
ROUND(s, 0x9, 0x9); ROUND(s, RC(0x6, 0xc));
ROUND(s, 0x6, 0xc); ROUND(s, RC(0x3, 0xc));
ROUND(s, 0x3, 0xc); ROUND(s, RC(0x6, 0x9));
ROUND(s, 0x6, 0x9); ROUND(s, RC(0x3, 0x9));
ROUND(s, 0x3, 0x9); ROUND(s, RC(0xc, 0x6));
ROUND(s, 0xc, 0x6); ROUND(s, RC(0x9, 0x6));
ROUND(s, 0x9, 0x6); ROUND(s, RC(0xc, 0x3));
ROUND(s, 0xc, 0x3); ROUND(s, RC(0x9, 0x3));
ROUND(s, 0x9, 0x3);
} }
__forceinline void P8ROUNDS(state_t* s) { forceinline void P8ROUNDS(state_t* s) {
ROUND(s, 0x6, 0xc); ROUND(s, RC(0x6, 0xc));
ROUND(s, 0x3, 0xc); ROUND(s, RC(0x3, 0xc));
ROUND(s, 0x6, 0x9); ROUND(s, RC(0x6, 0x9));
ROUND(s, 0x3, 0x9); ROUND(s, RC(0x3, 0x9));
ROUND(s, 0xc, 0x6); ROUND(s, RC(0xc, 0x6));
ROUND(s, 0x9, 0x6); ROUND(s, RC(0x9, 0x6));
ROUND(s, 0xc, 0x3); ROUND(s, RC(0xc, 0x3));
ROUND(s, 0x9, 0x3); ROUND(s, RC(0x9, 0x3));
} }
__forceinline void P6ROUNDS(state_t* s) { forceinline void P6ROUNDS(state_t* s) {
ROUND(s, 0x6, 0x9); ROUND(s, RC(0x6, 0x9));
ROUND(s, 0x3, 0x9); ROUND(s, RC(0x3, 0x9));
ROUND(s, 0xc, 0x6); ROUND(s, RC(0xc, 0x6));
ROUND(s, 0x9, 0x6); ROUND(s, RC(0x9, 0x6));
ROUND(s, 0xc, 0x3); ROUND(s, RC(0xc, 0x3));
ROUND(s, 0x9, 0x3); ROUND(s, RC(0x9, 0x3));
} }
#else /* !ASCON_UNROLL_LOOPS */
extern const uint8_t constants[][2]; extern const uint8_t constants[][2];
__forceinline void P12ROUNDS(state_t* s) { forceinline void PROUNDS(state_t* s, int nr) {
for (int i = START(12); i < 12; i++) for (int i = START(nr); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]); ROUND(s, RC(constants[i][0], constants[i][1]));
} }
__forceinline void P8ROUNDS(state_t* s) { #if ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
for (int i = START(8); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
__forceinline void P6ROUNDS(state_t* s) { forceinline void P(state_t* s, int nr) {
for (int i = START(6); i < 12; i++) if (nr == 12) P12ROUNDS(s);
ROUND(s, constants[i][0], constants[i][1]); if (nr == 8) P8ROUNDS(s);
if (nr == 6) P6ROUNDS(s);
} }
#endif #elif !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
#if ASCON_INLINE_PERM
__forceinline void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
__forceinline void P8(state_t* s) { void P12(state_t* s);
printstate(" permutation input", s); void P8(state_t* s);
P8ROUNDS(s); void P6(state_t* s);
}
__forceinline void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
__forceinline void P(state_t* s, int i) { forceinline void P(state_t* s, int nr) {
if (i == 12) P12(s); if (nr == 12) P12(s);
if (i == 8) P8(s); if (nr == 8) P8(s);
if (i == 6) P6(s); if (nr == 6) P6(s);
} }
#elif ASCON_SINGLE_PERM #elif ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
#define P12(s) P(s, 12) forceinline void P(state_t* s, int nr) { PROUNDS(s, nr); }
#define P8(s) P(s, 8)
#define P6(s) P(s, 6)
void P(state_t* s, uint8_t rounds); #else /* !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS */
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ void P(state_t* s, int nr);
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#endif #endif
......
#ifndef PRINTSTATE_H_ #ifndef PRINTSTATE_H_
#define PRINTSTATE_H_ #define PRINTSTATE_H_
#ifdef NDEBUG #ifdef ASCON_PRINTSTATE
#define printword(text, w) #include "ascon.h"
#define printstate(text, s) #include "word.h"
#else void printword(const char* text, const word_t x);
void printstate(const char* text, const state_t* s);
#include <inttypes.h> #else
#include <stdio.h>
#include "ascon.h" #define printword(text, w) \
#include "word.h" do { \
} while (0)
__forceinline void printword(const char* text, const word_t x) { #define printstate(text, s) \
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); do { \
} } while (0)
__forceinline void printstate(const char* text, const state_t* s) {
printf("%s:\n", text);
printword(" x0", s->x0);
printword(" x1", s->x1);
printword(" x2", s->x2);
printword(" x3", s->x3);
printword(" x4", s->x4);
}
#endif #endif
......
...@@ -4,13 +4,13 @@ ...@@ -4,13 +4,13 @@
#include "ascon.h" #include "ascon.h"
#include "printstate.h" #include "printstate.h"
__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
*K0 = WORD_T(0); *K0 = WORD_T(0);
*K1 = WORD_T(0); *K1 = WORD_T(0);
*K2 = WORD_T(0); *K2 = WORD_T(0);
} }
__forceinline void PINIT(state_t* s) { forceinline void PINIT(state_t* s) {
s->x0 = WORD_T(0); s->x0 = WORD_T(0);
s->x1 = WORD_T(0); s->x1 = WORD_T(0);
s->x2 = WORD_T(0); s->x2 = WORD_T(0);
...@@ -18,34 +18,34 @@ __forceinline void PINIT(state_t* s) { ...@@ -18,34 +18,34 @@ __forceinline void PINIT(state_t* s) {
s->x4 = WORD_T(0); s->x4 = WORD_T(0);
} }
__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { forceinline void ROUND(state_t* s, word_t C) {
word_t tmp, C = {.o = C_o, .e = C_e}; word_t xtemp;
/* round constant */ /* round constant */
s->x2 = XOR(s->x2, C); s->x2 = XOR(s->x2, C);
/* s-box layer */ /* s-box layer */
s->x0 = XOR(s->x0, s->x4); s->x0 = XOR(s->x0, s->x4);
s->x4 = XOR(s->x4, s->x3); s->x4 = XOR(s->x4, s->x3);
s->x2 = XOR(s->x2, s->x1); s->x2 = XOR(s->x2, s->x1);
tmp = AND(s->x0, NOT(s->x4)); xtemp = AND(s->x0, NOT(s->x4));
s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1)));
s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3)));
s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0)));
s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2)));
s->x3 = XOR(s->x3, tmp); s->x3 = XOR(s->x3, xtemp);
s->x1 = XOR(s->x1, s->x0); s->x1 = XOR(s->x1, s->x0);
s->x3 = XOR(s->x3, s->x2); s->x3 = XOR(s->x3, s->x2);
s->x0 = XOR(s->x0, s->x4); s->x0 = XOR(s->x0, s->x4);
/* linear layer */ /* linear layer */
tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); xtemp = XOR(s->x0, ROR(s->x0, 28 - 19));
s->x0 = XOR(s->x0, ROR64(tmp, 19)); s->x0 = XOR(s->x0, ROR(xtemp, 19));
tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); xtemp = XOR(s->x1, ROR(s->x1, 61 - 39));
s->x1 = XOR(s->x1, ROR64(tmp, 39)); s->x1 = XOR(s->x1, ROR(xtemp, 39));
tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); xtemp = XOR(s->x2, ROR(s->x2, 6 - 1));
s->x2 = XOR(s->x2, ROR64(tmp, 1)); s->x2 = XOR(s->x2, ROR(xtemp, 1));
tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); xtemp = XOR(s->x3, ROR(s->x3, 17 - 10));
s->x3 = XOR(s->x3, ROR64(tmp, 10)); s->x3 = XOR(s->x3, ROR(xtemp, 10));
tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); xtemp = XOR(s->x4, ROR(s->x4, 41 - 7));
s->x4 = XOR(s->x4, ROR64(tmp, 7)); s->x4 = XOR(s->x4, ROR(xtemp, 7));
s->x2 = NOT(s->x2); s->x2 = NOT(s->x2);
printstate(" round output", s); printstate(" round output", s);
} }
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <stdint.h> #include <stdint.h>
#include "endian.h" #include "endian.h"
#include "forceinline.h"
#include "interleave.h" #include "interleave.h"
typedef struct { typedef struct {
...@@ -11,102 +12,92 @@ typedef struct { ...@@ -11,102 +12,92 @@ typedef struct {
uint32_t o; uint32_t o;
} word_t; } word_t;
__forceinline uint32_t ROR32(uint32_t x, int n) { forceinline uint32_t ROR32(uint32_t x, int n) {
return (n == 0) ? x : x >> n | x << (32 - n); return (n == 0) ? x : x >> n | x << (32 - n);
} }
__forceinline word_t ROR64(word_t x, int n) { forceinline word_t ROR(word_t x, int n) {
word_t r; word_t r;
r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2);
r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2);
return r; return r;
} }
__forceinline word_t WORD_T(uint64_t x) { forceinline word_t WORD_T(uint64_t x) { return (word_t){.o = x >> 32, .e = x}; }
return (word_t){.o = x >> 32, .e = x};
}
__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; }
__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); } forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); }
__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); } forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); }
__forceinline word_t NOT(word_t a) { forceinline word_t NOT(word_t a) {
a.e = ~a.e; a.e = ~a.e;
a.o = ~a.o; a.o = ~a.o;
return a; return a;
} }
__forceinline word_t XOR(word_t a, word_t b) { forceinline word_t XOR(word_t a, word_t b) {
a.e ^= b.e; a.e ^= b.e;
a.o ^= b.o; a.o ^= b.o;
return a; return a;
} }
__forceinline word_t AND(word_t a, word_t b) { forceinline word_t AND(word_t a, word_t b) {
a.e &= b.e; a.e &= b.e;
a.o &= b.o; a.o &= b.o;
return a; return a;
} }
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
word_t r; word_t r;
r.o = lo2hi.o << 16 | hi2lo.o >> 16;
r.e = lo2hi.e << 16 | hi2lo.e >> 16; r.e = lo2hi.e << 16 | hi2lo.e >> 16;
r.o = lo2hi.o << 16 | hi2lo.o >> 16;
return r; return r;
} }
__forceinline uint8_t NOTZERO(word_t a, word_t b) { forceinline int NOTZERO(word_t a, word_t b) {
uint32_t result = a.e | a.o | b.e | b.o; uint32_t result = a.e | a.o | b.e | b.o;
result |= result >> 16; result |= result >> 16;
result |= result >> 8; result |= result >> 8;
return (uint8_t)result; return ((((int)(result & 0xff) - 1) >> 8) & 1) - 1;
} }
__forceinline word_t PAD(int i) { forceinline word_t PAD(int i) {
return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); return WORD_T((uint64_t)(0x8ul << (28 - 4 * i)) << 32);
} }
__forceinline word_t CLEAR(word_t w, int n) { forceinline word_t CLEAR(word_t w, int n) {
/* undefined for n == 0 */ /* undefined for n == 0 */
uint32_t mask = 0x0fffffff >> (n * 4 - 4); uint32_t mask = 0x0fffffff >> (n * 4 - 4);
return AND(w, WORD_T((uint64_t)mask << 32 | mask)); w.e &= mask;
w.o &= mask;
return w;
} }
__forceinline uint64_t MASK(int n) { forceinline uint64_t MASK(int n) {
/* undefined for n == 0 */ /* undefined for n == 0 */
return ~0ull >> (64 - 8 * n); return ~0ull >> (64 - 8 * n);
} }
__forceinline word_t LOAD64(const uint8_t* bytes) { forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes;
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes & MASK(n); uint64_t x = *(uint64_t*)bytes & MASK(n);
return U64TOWORD(U64BIG(x)); return U64TOWORD(U64BIG(x));
} }
__forceinline void STORE(uint8_t* bytes, word_t w, int n) { forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w); uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(n); *(uint64_t*)bytes &= ~MASK(n);
*(uint64_t*)bytes |= U64BIG(x); *(uint64_t*)bytes |= U64BIG(x);
} }
__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
uint64_t x = 0; uint64_t x = 0;
for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i];
return U64TOWORD(x); return U64TOWORD(x);
} }
__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w); uint64_t x = WORDTOU64(w);
for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i];
} }
......
...@@ -3,49 +3,43 @@ ...@@ -3,49 +3,43 @@
#include "permutations.h" #include "permutations.h"
#include "printstate.h" #include "printstate.h"
void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, forceinline void ascon_loadkey(word_t* K0, word_t* K1, word_t* K2,
uint8_t mode); const uint8_t* k) {
KINIT(K0, K1, K2);
void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, if (CRYPTO_KEYBYTES == 16) {
const uint8_t* ad, uint64_t adlen, const uint8_t* npub, *K1 = XOR(*K1, LOAD(k, 8));
const uint8_t* k, uint8_t mode) { *K2 = XOR(*K2, LOAD(k + 8, 8));
word_t K0, K1, K2; }
/* load key */
if (CRYPTO_KEYBYTES == 20) { if (CRYPTO_KEYBYTES == 20) {
K0 = KEYROT(WORD_T(0), LOAD(k, 4)); *K0 = XOR(*K0, KEYROT(WORD_T(0), LOADBYTES(k, 4)));
k += 4; *K1 = XOR(*K1, LOADBYTES(k + 4, 8));
*K2 = XOR(*K2, LOADBYTES(k + 12, 8));
} }
K1 = LOAD64(k); }
K2 = LOAD64(k + 8);
/* initialization */ forceinline void ascon_aeadinit(state_t* s, const uint8_t* npub, word_t K0,
s->x0 = IV; word_t K1, word_t K2) {
if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 8) s->x0 = ASCON_128_IV;
if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 16) s->x0 = ASCON_128A_IV;
if (CRYPTO_KEYBYTES == 20) s->x0 = ASCON_80PQ_IV;
if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0);
s->x1 = K1; s->x1 = K1;
s->x2 = K2; s->x2 = K2;
s->x3 = LOAD64(npub); s->x3 = LOAD(npub, 8);
s->x4 = LOAD64(npub + 8); s->x4 = LOAD(npub + 8, 8);
P12(s); P(s, 12);
if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0); if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0);
s->x3 = XOR(s->x3, K1); s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2); s->x4 = XOR(s->x4, K2);
printstate("initialization", s); printstate("initialization", s);
/* process associated data */ }
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_ABSORB); forceinline void ascon_final(state_t* s, word_t K0, word_t K1, word_t K2) {
PB(s); if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 8) {
}
s->x4 = XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
/* process plaintext/ciphertext */
process_data(s, out, in, tlen, mode);
if (mode == ASCON_ENCRYPT) printstate("process plaintext", s);
if (mode == ASCON_DECRYPT) printstate("process ciphertext", s);
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
s->x1 = XOR(s->x1, K1); s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2); s->x2 = XOR(s->x2, K2);
} }
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 16) {
s->x2 = XOR(s->x2, K1); s->x2 = XOR(s->x2, K1);
s->x3 = XOR(s->x3, K2); s->x3 = XOR(s->x3, K2);
} }
...@@ -54,8 +48,32 @@ void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, ...@@ -54,8 +48,32 @@ void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen,
s->x2 = XOR(s->x2, KEYROT(K1, K2)); s->x2 = XOR(s->x2, KEYROT(K1, K2));
s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0))); s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0)));
} }
P12(s); P(s, 12);
s->x3 = XOR(s->x3, K1); s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2); s->x4 = XOR(s->x4, K2);
printstate("finalization", s); printstate("finalization", s);
} }
void ascon_aead(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen,
const uint8_t* ad, uint64_t adlen, const uint8_t* npub,
const uint8_t* k, uint8_t mode) {
const int nr = (ASCON_AEAD_RATE == 8) ? 6 : 8;
word_t K0, K1, K2;
ascon_loadkey(&K0, &K1, &K2, k);
/* initialize */
ascon_aeadinit(s, npub, K0, K1, K2);
/* process associated data */
if (adlen) {
ascon_update(s, (void*)0, ad, adlen, ASCON_ABSORB);
P(s, nr);
}
/* domain separation */
s->x4 = XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
/* process plaintext/ciphertext */
ascon_update(s, out, in, tlen, mode);
if (mode == ASCON_ENCRYPT) printstate("process plaintext", s);
if (mode == ASCON_DECRYPT) printstate("process ciphertext", s);
/* finalize */
ascon_final(s, K0, K1, K2);
}
#define CRYPTO_VERSION "1.2.5"
#define CRYPTO_KEYBYTES 16 #define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0 #define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16 #define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16 #define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1 #define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16 #define ASCON_AEAD_RATE 16
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
#include <stdint.h> #include <stdint.h>
#include "config.h"
#include "word.h" #include "word.h"
typedef struct { typedef struct {
...@@ -13,13 +12,14 @@ typedef struct { ...@@ -13,13 +12,14 @@ typedef struct {
#define ASCON_ABSORB 0x1 #define ASCON_ABSORB 0x1
#define ASCON_SQUEEZE 0x2 #define ASCON_SQUEEZE 0x2
#define ASCON_INSERT 0x4 #define ASCON_INSERT 0x4
#define ASCON_HASH 0x8
#define ASCON_ENCRYPT (ASCON_ABSORB | ASCON_SQUEEZE) #define ASCON_ENCRYPT (ASCON_ABSORB | ASCON_SQUEEZE)
#define ASCON_DECRYPT (ASCON_ABSORB | ASCON_SQUEEZE | ASCON_INSERT) #define ASCON_DECRYPT (ASCON_ABSORB | ASCON_SQUEEZE | ASCON_INSERT)
void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, void ascon_update(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len,
uint8_t mode); uint8_t mode);
void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, void ascon_aead(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen,
const uint8_t* ad, uint64_t adlen, const uint8_t* npub, const uint8_t* ad, uint64_t adlen, const uint8_t* npub,
const uint8_t* k, uint8_t mode); const uint8_t* k, uint8_t mode);
......
#ifndef CONFIG_H_ #ifndef CONFIG_H_
#define CONFIG_H_ #define CONFIG_H_
/* inline the Ascon mode */ /* inline the ascon mode */
#ifndef ASCON_INLINE_MODE #ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1 #define ASCON_INLINE_MODE 0
#endif #endif
/* inline the Ascon permutations */ /* inline all permutations */
#ifndef ASCON_INLINE_PERM #ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 0 #define ASCON_INLINE_PERM 0
#endif #endif
/* single function for all permutations */ /* unroll permutation loops */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 1
#endif
/* unroll the permutation loops */
#ifndef ASCON_UNROLL_LOOPS #ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 1 #define ASCON_UNROLL_LOOPS 0
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif #endif
#endif /* CONFIG_H_ */ #endif /* CONFIG_H_ */
#include "api.h" #include "api.h"
#include "ascon.h" #include "ascon.h"
#include "crypto_aead.h"
#include "permutations.h" #include "permutations.h"
#include "printstate.h" #include "printstate.h"
void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, void ascon_aead(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen,
const uint8_t* ad, uint64_t adlen, const uint8_t* npub, const uint8_t* ad, uint64_t adlen, const uint8_t* npub,
const uint8_t* k, uint8_t mode); const uint8_t* k, uint8_t mode);
int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
const uint8_t* c, uint64_t clen, const uint8_t* ad, unsigned char* nsec, const unsigned char* c,
uint64_t adlen, const uint8_t* npub, const uint8_t* k) { unsigned long long clen, const unsigned char* ad,
if (clen < CRYPTO_ABYTES) { unsigned long long adlen, const unsigned char* npub,
*mlen = 0; const unsigned char* k) {
return -1;
}
state_t s; state_t s;
(void)nsec; (void)nsec;
if (clen < CRYPTO_ABYTES) return -1;
/* set plaintext size */ /* set plaintext size */
*mlen = clen - CRYPTO_ABYTES; *mlen = clen - CRYPTO_ABYTES;
/* ascon decryption */ /* ascon decryption */
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DECRYPT); ascon_aead(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DECRYPT);
/* verify tag (should be constant time, check compiler output) */ /* verify tag (should be constant time, check compiler output) */
s.x3 = XOR(s.x3, LOADBYTES(c + *mlen, 8)); s.x3 = XOR(s.x3, LOADBYTES(c + *mlen, 8));
s.x4 = XOR(s.x4, LOADBYTES(c + *mlen + 8, 8)); s.x4 = XOR(s.x4, LOADBYTES(c + *mlen + 8, 8));
if (NOTZERO(s.x3, s.x4)) { return NOTZERO(s.x3, s.x4);
*mlen = 0;
return -1;
}
return 0;
} }
#include "api.h" #include "api.h"
#include "ascon.h" #include "ascon.h"
#include "crypto_aead.h"
#include "permutations.h" #include "permutations.h"
#include "printstate.h" #include "printstate.h"
void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, void ascon_aead(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen,
const uint8_t* ad, uint64_t adlen, const uint8_t* npub, const uint8_t* ad, uint64_t adlen, const uint8_t* npub,
const uint8_t* k, uint8_t mode); const uint8_t* k, uint8_t mode);
int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
uint64_t mlen, const uint8_t* ad, uint64_t adlen, const unsigned char* m, unsigned long long mlen,
const uint8_t* nsec, const uint8_t* npub, const unsigned char* ad, unsigned long long adlen,
const uint8_t* k) { const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
state_t s; state_t s;
(void)nsec; (void)nsec;
/* set ciphertext size */ /* set ciphertext size */
*clen = mlen + CRYPTO_ABYTES; *clen = mlen + CRYPTO_ABYTES;
/* ascon encryption */ /* ascon encryption */
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENCRYPT); ascon_aead(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENCRYPT);
/* set tag */ /* set tag */
STOREBYTES(c + mlen, s.x3, 8); STOREBYTES(c + mlen, s.x3, 8);
STOREBYTES(c + mlen + 8, s.x4, 8); STOREBYTES(c + mlen + 8, s.x4, 8);
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* macros for big endian machines */ /* macros for big endian machines */
#ifndef NDEBUG #ifdef PRAGMA_ENDIAN
#pragma message("Using macros for big endian machines") #pragma message("Using macros for big endian machines")
#endif #endif
#define U64BIG(x) (x) #define U64BIG(x) (x)
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
/* macros for little endian machines */ /* macros for little endian machines */
#ifndef NDEBUG #ifdef PRAGMA_ENDIAN
#pragma message("Using macros for little endian machines") #pragma message("Using macros for little endian machines")
#endif #endif
#define U64BIG(x) \ #define U64BIG(x) \
......
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
#include <stdint.h> #include <stdint.h>
#include "forceinline.h"
uint64_t deinterleave32(uint64_t in); uint64_t deinterleave32(uint64_t in);
uint64_t interleave32(uint64_t in); uint64_t interleave32(uint64_t in);
......
#include "permutations.h" #include "permutations.h"
#include "round.h" #if !ASCON_UNROLL_LOOPS
#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM
const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9},
{0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9},
...@@ -10,35 +8,16 @@ const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, ...@@ -10,35 +8,16 @@ const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9},
#endif #endif
#if ASCON_INLINE_PERM #if !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
#elif ASCON_SINGLE_PERM
void P(state_t* s, uint8_t rounds) {
printstate(" permutation input", s);
for (int i = START(rounds); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ void P12(state_t* s) { P12ROUNDS(s); }
void P8(state_t* s) { P8ROUNDS(s); }
void P6(state_t* s) { P6ROUNDS(s); }
void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16
void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
#endif #endif
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 #if !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
void P6(state_t* s) {
printstate(" permutation input", s); void P(state_t* s, int nr) { PROUNDS(s, nr); }
P6ROUNDS(s);
}
#endif
#endif #endif
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include "api.h" #include "api.h"
#include "ascon.h" #include "ascon.h"
#include "config.h"
#include "printstate.h" #include "printstate.h"
#include "round.h" #include "round.h"
...@@ -14,154 +15,124 @@ ...@@ -14,154 +15,124 @@
#define ASCON_128_RATE 8 #define ASCON_128_RATE 8
#define ASCON_128A_RATE 16 #define ASCON_128A_RATE 16
#define ASCON_HASH_RATE 8
#define ASCON_128_PA_ROUNDS 12 #define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6 #define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PA_ROUNDS 12
#define ASCON_128A_PB_ROUNDS 8 #define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32 #define ASCON_HASH_PA_ROUNDS 12
#define ASCON_HASH_PB_ROUNDS 12
#define ASCON_128_IV WORD_T(0x8021000008220000) #define ASCON_HASHA_PA_ROUNDS 12
#define ASCON_128A_IV WORD_T(0x8822000000200000) #define ASCON_HASHA_PB_ROUNDS 8
#define ASCON_80PQ_IV WORD_T(0xc021000008220000)
#define ASCON_HASH_IV WORD_T(0x0020000008020010)
#define ASCON_XOF_IV WORD_T(0x0020000008020000)
#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7)
#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340)
#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8)
#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98)
#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398)
#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6)
#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220)
#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04)
#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2)
#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#if ASCON_RATE == 16 #define ASCON_HASH_BYTES 32
#define IV ASCON_128A_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define PB P8
#endif
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 #define ASCON_128_IV WORD_T(0x8021000008220000ull)
#define IV ASCON_80PQ_IV #define ASCON_128A_IV WORD_T(0x8822000000200000ull)
#define PA_ROUNDS 12 #define ASCON_80PQ_IV WORD_T(0xc021000008220000ull)
#define PB_ROUNDS 6 #define ASCON_HASH_IV WORD_T(0x0020000008020010ull)
#define PB P6 #define ASCON_XOF_IV WORD_T(0x0020000008020000ull)
#endif
#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7ull)
#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340ull)
#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8ull)
#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98ull)
#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398ull)
#define ASCON_HASHA_IV0 WORD_T(0x0108e46d1b16eb02ull)
#define ASCON_HASHA_IV1 WORD_T(0x5b9b8efdd29083f3ull)
#define ASCON_HASHA_IV2 WORD_T(0x7ad665622891ae4aull)
#define ASCON_HASHA_IV3 WORD_T(0x9dc27156ee3bfc7full)
#define ASCON_HASHA_IV4 WORD_T(0xc61d5fa916801633ull)
#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6ull)
#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220ull)
#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04ull)
#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2ull)
#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656ull)
#define ASCON_XOFA_IV0 WORD_T(0x0846d7a5a4b87d44ull)
#define ASCON_XOFA_IV1 WORD_T(0xaa6f1005b3a2dbf4ull)
#define ASCON_XOFA_IV2 WORD_T(0xdc451146f713e811ull)
#define ASCON_XOFA_IV3 WORD_T(0x468cb2532839e30dull)
#define ASCON_XOFA_IV4 WORD_T(0xeb2d429709e96977ull)
#define START(n) (12 - n) #define START(n) (12 - n)
#define RC(e, o) WORD_T((uint64_t)o << 32 | e)
#if ASCON_UNROLL_LOOPS
forceinline void P12ROUNDS(state_t* s) {
__forceinline void P12ROUNDS(state_t* s) { ROUND(s, RC(0xc, 0xc));
ROUND(s, 0xc, 0xc); ROUND(s, RC(0x9, 0xc));
ROUND(s, 0x9, 0xc); ROUND(s, RC(0xc, 0x9));
ROUND(s, 0xc, 0x9); ROUND(s, RC(0x9, 0x9));
ROUND(s, 0x9, 0x9); ROUND(s, RC(0x6, 0xc));
ROUND(s, 0x6, 0xc); ROUND(s, RC(0x3, 0xc));
ROUND(s, 0x3, 0xc); ROUND(s, RC(0x6, 0x9));
ROUND(s, 0x6, 0x9); ROUND(s, RC(0x3, 0x9));
ROUND(s, 0x3, 0x9); ROUND(s, RC(0xc, 0x6));
ROUND(s, 0xc, 0x6); ROUND(s, RC(0x9, 0x6));
ROUND(s, 0x9, 0x6); ROUND(s, RC(0xc, 0x3));
ROUND(s, 0xc, 0x3); ROUND(s, RC(0x9, 0x3));
ROUND(s, 0x9, 0x3);
} }
__forceinline void P8ROUNDS(state_t* s) { forceinline void P8ROUNDS(state_t* s) {
ROUND(s, 0x6, 0xc); ROUND(s, RC(0x6, 0xc));
ROUND(s, 0x3, 0xc); ROUND(s, RC(0x3, 0xc));
ROUND(s, 0x6, 0x9); ROUND(s, RC(0x6, 0x9));
ROUND(s, 0x3, 0x9); ROUND(s, RC(0x3, 0x9));
ROUND(s, 0xc, 0x6); ROUND(s, RC(0xc, 0x6));
ROUND(s, 0x9, 0x6); ROUND(s, RC(0x9, 0x6));
ROUND(s, 0xc, 0x3); ROUND(s, RC(0xc, 0x3));
ROUND(s, 0x9, 0x3); ROUND(s, RC(0x9, 0x3));
} }
__forceinline void P6ROUNDS(state_t* s) { forceinline void P6ROUNDS(state_t* s) {
ROUND(s, 0x6, 0x9); ROUND(s, RC(0x6, 0x9));
ROUND(s, 0x3, 0x9); ROUND(s, RC(0x3, 0x9));
ROUND(s, 0xc, 0x6); ROUND(s, RC(0xc, 0x6));
ROUND(s, 0x9, 0x6); ROUND(s, RC(0x9, 0x6));
ROUND(s, 0xc, 0x3); ROUND(s, RC(0xc, 0x3));
ROUND(s, 0x9, 0x3); ROUND(s, RC(0x9, 0x3));
} }
#else /* !ASCON_UNROLL_LOOPS */
extern const uint8_t constants[][2]; extern const uint8_t constants[][2];
__forceinline void P12ROUNDS(state_t* s) { forceinline void PROUNDS(state_t* s, int nr) {
for (int i = START(12); i < 12; i++) for (int i = START(nr); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]); ROUND(s, RC(constants[i][0], constants[i][1]));
} }
__forceinline void P8ROUNDS(state_t* s) { #if ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
for (int i = START(8); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
__forceinline void P6ROUNDS(state_t* s) { forceinline void P(state_t* s, int nr) {
for (int i = START(6); i < 12; i++) if (nr == 12) P12ROUNDS(s);
ROUND(s, constants[i][0], constants[i][1]); if (nr == 8) P8ROUNDS(s);
if (nr == 6) P6ROUNDS(s);
} }
#endif #elif !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
#if ASCON_INLINE_PERM
__forceinline void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
__forceinline void P8(state_t* s) { void P12(state_t* s);
printstate(" permutation input", s); void P8(state_t* s);
P8ROUNDS(s); void P6(state_t* s);
}
__forceinline void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
__forceinline void P(state_t* s, int i) { forceinline void P(state_t* s, int nr) {
if (i == 12) P12(s); if (nr == 12) P12(s);
if (i == 8) P8(s); if (nr == 8) P8(s);
if (i == 6) P6(s); if (nr == 6) P6(s);
} }
#elif ASCON_SINGLE_PERM #elif ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
#define P12(s) P(s, 12) forceinline void P(state_t* s, int nr) { PROUNDS(s, nr); }
#define P8(s) P(s, 8)
#define P6(s) P(s, 6)
void P(state_t* s, uint8_t rounds); #else /* !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS */
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ void P(state_t* s, int nr);
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#endif #endif
......
#ifndef PRINTSTATE_H_ #ifndef PRINTSTATE_H_
#define PRINTSTATE_H_ #define PRINTSTATE_H_
#ifdef NDEBUG #ifdef ASCON_PRINTSTATE
#define printword(text, w) #include "ascon.h"
#define printstate(text, s) #include "word.h"
#else void printword(const char* text, const word_t x);
void printstate(const char* text, const state_t* s);
#include <inttypes.h> #else
#include <stdio.h>
#include "ascon.h" #define printword(text, w) \
#include "word.h" do { \
} while (0)
__forceinline void printword(const char* text, const word_t x) { #define printstate(text, s) \
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); do { \
} } while (0)
__forceinline void printstate(const char* text, const state_t* s) {
printf("%s:\n", text);
printword(" x0", s->x0);
printword(" x1", s->x1);
printword(" x2", s->x2);
printword(" x3", s->x3);
printword(" x4", s->x4);
}
#endif #endif
......
...@@ -4,13 +4,13 @@ ...@@ -4,13 +4,13 @@
#include "ascon.h" #include "ascon.h"
#include "printstate.h" #include "printstate.h"
__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
*K0 = WORD_T(0); *K0 = WORD_T(0);
*K1 = WORD_T(0); *K1 = WORD_T(0);
*K2 = WORD_T(0); *K2 = WORD_T(0);
} }
__forceinline void PINIT(state_t* s) { forceinline void PINIT(state_t* s) {
s->x0 = WORD_T(0); s->x0 = WORD_T(0);
s->x1 = WORD_T(0); s->x1 = WORD_T(0);
s->x2 = WORD_T(0); s->x2 = WORD_T(0);
...@@ -18,34 +18,34 @@ __forceinline void PINIT(state_t* s) { ...@@ -18,34 +18,34 @@ __forceinline void PINIT(state_t* s) {
s->x4 = WORD_T(0); s->x4 = WORD_T(0);
} }
__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { forceinline void ROUND(state_t* s, word_t C) {
word_t tmp, C = {.o = C_o, .e = C_e}; word_t xtemp;
/* round constant */ /* round constant */
s->x2 = XOR(s->x2, C); s->x2 = XOR(s->x2, C);
/* s-box layer */ /* s-box layer */
s->x0 = XOR(s->x0, s->x4); s->x0 = XOR(s->x0, s->x4);
s->x4 = XOR(s->x4, s->x3); s->x4 = XOR(s->x4, s->x3);
s->x2 = XOR(s->x2, s->x1); s->x2 = XOR(s->x2, s->x1);
tmp = AND(s->x0, NOT(s->x4)); xtemp = AND(s->x0, NOT(s->x4));
s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1)));
s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3)));
s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0)));
s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2)));
s->x3 = XOR(s->x3, tmp); s->x3 = XOR(s->x3, xtemp);
s->x1 = XOR(s->x1, s->x0); s->x1 = XOR(s->x1, s->x0);
s->x3 = XOR(s->x3, s->x2); s->x3 = XOR(s->x3, s->x2);
s->x0 = XOR(s->x0, s->x4); s->x0 = XOR(s->x0, s->x4);
/* linear layer */ /* linear layer */
tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); xtemp = XOR(s->x0, ROR(s->x0, 28 - 19));
s->x0 = XOR(s->x0, ROR64(tmp, 19)); s->x0 = XOR(s->x0, ROR(xtemp, 19));
tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); xtemp = XOR(s->x1, ROR(s->x1, 61 - 39));
s->x1 = XOR(s->x1, ROR64(tmp, 39)); s->x1 = XOR(s->x1, ROR(xtemp, 39));
tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); xtemp = XOR(s->x2, ROR(s->x2, 6 - 1));
s->x2 = XOR(s->x2, ROR64(tmp, 1)); s->x2 = XOR(s->x2, ROR(xtemp, 1));
tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); xtemp = XOR(s->x3, ROR(s->x3, 17 - 10));
s->x3 = XOR(s->x3, ROR64(tmp, 10)); s->x3 = XOR(s->x3, ROR(xtemp, 10));
tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); xtemp = XOR(s->x4, ROR(s->x4, 41 - 7));
s->x4 = XOR(s->x4, ROR64(tmp, 7)); s->x4 = XOR(s->x4, ROR(xtemp, 7));
s->x2 = NOT(s->x2); s->x2 = NOT(s->x2);
printstate(" round output", s); printstate(" round output", s);
} }
......
...@@ -5,57 +5,40 @@ ...@@ -5,57 +5,40 @@
void ascon_update(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, void ascon_update(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len,
uint8_t mode) { uint8_t mode) {
const int nr = (ASCON_RATE == 8) ? 6 : 8; const int rate = 16;
const int nr = 8;
word_t tmp0, tmp1; word_t tmp0, tmp1;
/* full blocks */ int n = 0, n0 = 0, n1 = 0;
while (len >= ASCON_RATE) { while (len) {
tmp0 = LOAD(in, 8); /* determine block size */
tmp1 = LOAD(in + 8, 8); n0 = len < 8 ? len : 8;
n1 = len < 8 ? 0 : (len < 16 ? len - 8 : 8);
n = n0 + n1;
/* absorb data */
tmp0 = LOAD(in, n0);
s->x0 = XOR(s->x0, tmp0); s->x0 = XOR(s->x0, tmp0);
s->x1 = XOR(s->x1, tmp1); if (n1) tmp1 = LOAD(in + 8, n1);
if (n1) s->x1 = XOR(s->x1, tmp1);
/* extract data */
if (mode & ASCON_SQUEEZE) { if (mode & ASCON_SQUEEZE) {
STORE(out, s->x0, 8); STORE(out, s->x0, n0);
STORE(out + 8, s->x1, 8); if (n1) STORE(out + 8, s->x1, n1);
} }
/* insert data */
if (mode & ASCON_INSERT) { if (mode & ASCON_INSERT) {
s->x0 = tmp0; s->x0 = CLEAR(s->x0, n0);
s->x1 = tmp1; s->x0 = XOR(s->x0, tmp0);
if (n1) s->x1 = CLEAR(s->x1, n1);
if (n1) s->x1 = XOR(s->x1, tmp1);
} }
P(s, nr); /* compute permutation for full blocks */
in += ASCON_RATE; if (n == rate) P(s, nr);
out += ASCON_RATE; in += n;
len -= ASCON_RATE; out += n;
len -= n;
} }
/* final block */ if (n % rate < 8)
if (len) { s->x0 = XOR(s->x0, PAD(n0 % 8));
tmp1 = WORD_T(0);
if (len >= 8) tmp0 = LOAD(in, 8);
if (len > 8)
tmp1 = LOAD(in + 8, len - 8);
else
tmp0 = LOAD(in, len);
s->x0 = XOR(s->x0, tmp0);
s->x1 = XOR(s->x1, tmp1);
if (mode & ASCON_SQUEEZE) {
if (len >= 8) STORE(out, s->x0, 8);
if (len > 8)
STORE(out + 8, s->x1, len - 8);
else
STORE(out, s->x0, len);
}
if (mode & ASCON_INSERT) {
if (len >= 8) s->x0 = tmp0;
if (len > 8) {
s->x1 = CLEAR(s->x1, len - 8);
s->x1 = XOR(s->x1, tmp1);
} else {
s->x0 = CLEAR(s->x0, len);
s->x0 = XOR(s->x0, tmp0);
}
}
}
if (len < 8)
s->x0 = XOR(s->x0, PAD(len % 8));
else else
s->x1 = XOR(s->x1, PAD(len % 8)); s->x1 = XOR(s->x1, PAD(n1 % 8));
} }
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <stdint.h> #include <stdint.h>
#include "endian.h" #include "endian.h"
#include "forceinline.h"
#include "interleave.h" #include "interleave.h"
typedef struct { typedef struct {
...@@ -11,102 +12,92 @@ typedef struct { ...@@ -11,102 +12,92 @@ typedef struct {
uint32_t o; uint32_t o;
} word_t; } word_t;
__forceinline uint32_t ROR32(uint32_t x, int n) { forceinline uint32_t ROR32(uint32_t x, int n) {
return (n == 0) ? x : x >> n | x << (32 - n); return (n == 0) ? x : x >> n | x << (32 - n);
} }
__forceinline word_t ROR64(word_t x, int n) { forceinline word_t ROR(word_t x, int n) {
word_t r; word_t r;
r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2);
r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2);
return r; return r;
} }
__forceinline word_t WORD_T(uint64_t x) { forceinline word_t WORD_T(uint64_t x) { return (word_t){.o = x >> 32, .e = x}; }
return (word_t){.o = x >> 32, .e = x};
}
__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; }
__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); } forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); }
__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); } forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); }
__forceinline word_t NOT(word_t a) { forceinline word_t NOT(word_t a) {
a.e = ~a.e; a.e = ~a.e;
a.o = ~a.o; a.o = ~a.o;
return a; return a;
} }
__forceinline word_t XOR(word_t a, word_t b) { forceinline word_t XOR(word_t a, word_t b) {
a.e ^= b.e; a.e ^= b.e;
a.o ^= b.o; a.o ^= b.o;
return a; return a;
} }
__forceinline word_t AND(word_t a, word_t b) { forceinline word_t AND(word_t a, word_t b) {
a.e &= b.e; a.e &= b.e;
a.o &= b.o; a.o &= b.o;
return a; return a;
} }
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
word_t r; word_t r;
r.o = lo2hi.o << 16 | hi2lo.o >> 16;
r.e = lo2hi.e << 16 | hi2lo.e >> 16; r.e = lo2hi.e << 16 | hi2lo.e >> 16;
r.o = lo2hi.o << 16 | hi2lo.o >> 16;
return r; return r;
} }
__forceinline uint8_t NOTZERO(word_t a, word_t b) { forceinline int NOTZERO(word_t a, word_t b) {
uint32_t result = a.e | a.o | b.e | b.o; uint32_t result = a.e | a.o | b.e | b.o;
result |= result >> 16; result |= result >> 16;
result |= result >> 8; result |= result >> 8;
return (uint8_t)result; return ((((int)(result & 0xff) - 1) >> 8) & 1) - 1;
} }
__forceinline word_t PAD(int i) { forceinline word_t PAD(int i) {
return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); return WORD_T((uint64_t)(0x8ul << (28 - 4 * i)) << 32);
} }
__forceinline word_t CLEAR(word_t w, int n) { forceinline word_t CLEAR(word_t w, int n) {
/* undefined for n == 0 */ /* undefined for n == 0 */
uint32_t mask = 0x0fffffff >> (n * 4 - 4); uint32_t mask = 0x0fffffff >> (n * 4 - 4);
return AND(w, WORD_T((uint64_t)mask << 32 | mask)); w.e &= mask;
w.o &= mask;
return w;
} }
__forceinline uint64_t MASK(int n) { forceinline uint64_t MASK(int n) {
/* undefined for n == 0 */ /* undefined for n == 0 */
return ~0ull >> (64 - 8 * n); return ~0ull >> (64 - 8 * n);
} }
__forceinline word_t LOAD64(const uint8_t* bytes) { forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes;
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes & MASK(n); uint64_t x = *(uint64_t*)bytes & MASK(n);
return U64TOWORD(U64BIG(x)); return U64TOWORD(U64BIG(x));
} }
__forceinline void STORE(uint8_t* bytes, word_t w, int n) { forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w); uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(n); *(uint64_t*)bytes &= ~MASK(n);
*(uint64_t*)bytes |= U64BIG(x); *(uint64_t*)bytes |= U64BIG(x);
} }
__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
uint64_t x = 0; uint64_t x = 0;
for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i];
return U64TOWORD(x); return U64TOWORD(x);
} }
__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w); uint64_t x = WORDTOU64(w);
for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i];
} }
......
#define CRYPTO_VERSION "1.2.5"
#define CRYPTO_KEYBYTES 16 #define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0 #define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16 #define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16 #define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1 #define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16 #define ASCON_AEAD_RATE 16
...@@ -3,15 +3,14 @@ ...@@ -3,15 +3,14 @@
#include <stdint.h> #include <stdint.h>
#include "config.h"
#include "word.h" #include "word.h"
typedef struct { typedef struct {
word_t x0, x1, x2, x3, x4; word_t x0, x1, x2, x3, x4;
} state_t; } state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); void ascon_aeadinit(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k); void ascon_final(state_t* s, const uint8_t* k);
......
#ifndef CONFIG_H_ #ifndef CONFIG_H_
#define CONFIG_H_ #define CONFIG_H_
/* inline the Ascon mode */ /* inline the ascon mode */
#ifndef ASCON_INLINE_MODE #ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1 #define ASCON_INLINE_MODE 1
#endif #endif
/* inline the Ascon permutations */ /* inline all permutations */
#ifndef ASCON_INLINE_PERM #ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 0 #define ASCON_INLINE_PERM 0
#endif #endif
/* single function for all permutations */ /* unroll permutation loops */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 1
#endif
/* unroll the permutation loops */
#ifndef ASCON_UNROLL_LOOPS #ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 0 #define ASCON_UNROLL_LOOPS 0
#endif #endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif
#endif /* CONFIG_H_ */ #endif /* CONFIG_H_ */
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* macros for big endian machines */ /* macros for big endian machines */
#ifndef NDEBUG #ifdef PRAGMA_ENDIAN
#pragma message("Using macros for big endian machines") #pragma message("Using macros for big endian machines")
#endif #endif
#define U64BIG(x) (x) #define U64BIG(x) (x)
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
/* macros for little endian machines */ /* macros for little endian machines */
#ifndef NDEBUG #ifdef PRAGMA_ENDIAN
#pragma message("Using macros for little endian machines") #pragma message("Using macros for little endian machines")
#endif #endif
#define U64BIG(x) \ #define U64BIG(x) \
......
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
#include <stdint.h> #include <stdint.h>
#include "forceinline.h"
uint64_t interleave8(uint64_t x); uint64_t interleave8(uint64_t x);
#endif /* INTERLEAVE_H_ */ #endif /* INTERLEAVE_H_ */
#include "permutations.h" #include "permutations.h"
#include "round.h" #if !ASCON_UNROLL_LOOPS
#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM
const uint64_t constants[12] = { const uint64_t constants[12] = {
0x0101010100000000ull, 0x0101010000000001ull, 0x0101000100000100ull, 0x0101010100000000ull, 0x0101010000000001ull, 0x0101000100000100ull,
...@@ -12,34 +10,16 @@ const uint64_t constants[12] = { ...@@ -12,34 +10,16 @@ const uint64_t constants[12] = {
#endif #endif
#if ASCON_INLINE_PERM #if !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
#elif ASCON_SINGLE_PERM
void P(state_t* s, uint8_t rounds) {
printstate(" permutation input", s);
for (int i = START(rounds); i < 12; ++i) ROUND(s, constants[i]);
}
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ void P12(state_t* s) { P12ROUNDS(s); }
void P8(state_t* s) { P8ROUNDS(s); }
void P6(state_t* s) { P6ROUNDS(s); }
void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16
void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
#endif #endif
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 #if !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
void P6(state_t* s) {
printstate(" permutation input", s); void P(state_t* s, int nr) { PROUNDS(s, nr); }
P6ROUNDS(s);
}
#endif
#endif #endif
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include "api.h" #include "api.h"
#include "ascon.h" #include "ascon.h"
#include "config.h"
#include "printstate.h" #include "printstate.h"
#include "round.h" #include "round.h"
...@@ -14,11 +15,20 @@ ...@@ -14,11 +15,20 @@
#define ASCON_128_RATE 8 #define ASCON_128_RATE 8
#define ASCON_128A_RATE 16 #define ASCON_128A_RATE 16
#define ASCON_HASH_RATE 8
#define ASCON_128_PA_ROUNDS 12 #define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6 #define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PA_ROUNDS 12
#define ASCON_128A_PB_ROUNDS 8 #define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_PA_ROUNDS 12
#define ASCON_HASH_PB_ROUNDS 12
#define ASCON_HASHA_PA_ROUNDS 12
#define ASCON_HASHA_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32 #define ASCON_HASH_BYTES 32
#define ASCON_128_IV WORD_T(0x8040000020301000ull) #define ASCON_128_IV WORD_T(0x8040000020301000ull)
...@@ -33,132 +43,95 @@ ...@@ -33,132 +43,95 @@
#define ASCON_HASH_IV3 WORD_T(0x2f871f6c6d0082b2ull) #define ASCON_HASH_IV3 WORD_T(0x2f871f6c6d0082b2ull)
#define ASCON_HASH_IV4 WORD_T(0x7a1ba68850ec407eull) #define ASCON_HASH_IV4 WORD_T(0x7a1ba68850ec407eull)
#define ASCON_HASHA_IV0 WORD_T(0x194c0f180a5d41e4ull)
#define ASCON_HASHA_IV1 WORD_T(0x7faa87825647f3a7ull)
#define ASCON_HASHA_IV2 WORD_T(0x606dbe06db8da430ull)
#define ASCON_HASHA_IV3 WORD_T(0xe0dd6bcf19fbce3bull)
#define ASCON_HASHA_IV4 WORD_T(0x9720dc4446473d8bull)
#define ASCON_XOF_IV0 WORD_T(0x8a46f0d354e771b8ull) #define ASCON_XOF_IV0 WORD_T(0x8a46f0d354e771b8ull)
#define ASCON_XOF_IV1 WORD_T(0x04489f4084368cd0ull) #define ASCON_XOF_IV1 WORD_T(0x04489f4084368cd0ull)
#define ASCON_XOF_IV2 WORD_T(0x6c94f2150dbcf66cull) #define ASCON_XOF_IV2 WORD_T(0x6c94f2150dbcf66cull)
#define ASCON_XOF_IV3 WORD_T(0x48965294f143b44eull) #define ASCON_XOF_IV3 WORD_T(0x48965294f143b44eull)
#define ASCON_XOF_IV4 WORD_T(0x0788515fe0e5fb8aull) #define ASCON_XOF_IV4 WORD_T(0x0788515fe0e5fb8aull)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define ASCON_XOFA_IV0 WORD_T(0x4ab43d4f16a80d2cull)
#define IV ASCON_128_IV #define ASCON_XOFA_IV1 WORD_T(0xd0ae310bf0f619ceull)
#define PA_ROUNDS 12 #define ASCON_XOFA_IV2 WORD_T(0xc08cf3c801d89cf3ull)
#define PB_ROUNDS 6 #define ASCON_XOFA_IV3 WORD_T(0x3859d2094dac0b35ull)
#define PB P6 #define ASCON_XOFA_IV4 WORD_T(0xd274992be52b5357ull)
#endif
#if ASCON_RATE == 16
#define IV ASCON_128A_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define PB P8
#endif
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20
#define IV ASCON_80PQ_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#define START(n) (12 - n) #define START(n) (12 - n)
#define RC(c) WORD_T(c)
#if ASCON_UNROLL_LOOPS
forceinline void P12ROUNDS(state_t* s) {
__forceinline void P12ROUNDS(state_t* s) { ROUND(s, RC(0x0101010100000000ull));
ROUND(s, 0x0101010100000000ull); ROUND(s, RC(0x0101010000000001ull));
ROUND(s, 0x0101010000000001ull); ROUND(s, RC(0x0101000100000100ull));
ROUND(s, 0x0101000100000100ull); ROUND(s, RC(0x0101000000000101ull));
ROUND(s, 0x0101000000000101ull); ROUND(s, RC(0x0100010100010000ull));
ROUND(s, 0x0100010100010000ull); ROUND(s, RC(0x0100010000010001ull));
ROUND(s, 0x0100010000010001ull); ROUND(s, RC(0x0100000100010100ull));
ROUND(s, 0x0100000100010100ull); ROUND(s, RC(0x0100000000010101ull));
ROUND(s, 0x0100000000010101ull); ROUND(s, RC(0x0001010101000000ull));
ROUND(s, 0x0001010101000000ull); ROUND(s, RC(0x0001010001000001ull));
ROUND(s, 0x0001010001000001ull); ROUND(s, RC(0x0001000101000100ull));
ROUND(s, 0x0001000101000100ull); ROUND(s, RC(0x0001000001000101ull));
ROUND(s, 0x0001000001000101ull);
} }
__forceinline void P8ROUNDS(state_t* s) { forceinline void P8ROUNDS(state_t* s) {
ROUND(s, 0x0100010100010000ull); ROUND(s, RC(0x0100010100010000ull));
ROUND(s, 0x0100010000010001ull); ROUND(s, RC(0x0100010000010001ull));
ROUND(s, 0x0100000100010100ull); ROUND(s, RC(0x0100000100010100ull));
ROUND(s, 0x0100000000010101ull); ROUND(s, RC(0x0100000000010101ull));
ROUND(s, 0x0001010101000000ull); ROUND(s, RC(0x0001010101000000ull));
ROUND(s, 0x0001010001000001ull); ROUND(s, RC(0x0001010001000001ull));
ROUND(s, 0x0001000101000100ull); ROUND(s, RC(0x0001000101000100ull));
ROUND(s, 0x0001000001000101ull); ROUND(s, RC(0x0001000001000101ull));
} }
__forceinline void P6ROUNDS(state_t* s) { forceinline void P6ROUNDS(state_t* s) {
ROUND(s, 0x0100000100010100ull); ROUND(s, RC(0x0100000100010100ull));
ROUND(s, 0x0100000000010101ull); ROUND(s, RC(0x0100000000010101ull));
ROUND(s, 0x0001010101000000ull); ROUND(s, RC(0x0001010101000000ull));
ROUND(s, 0x0001010001000001ull); ROUND(s, RC(0x0001010001000001ull));
ROUND(s, 0x0001000101000100ull); ROUND(s, RC(0x0001000101000100ull));
ROUND(s, 0x0001000001000101ull); ROUND(s, RC(0x0001000001000101ull));
} }
#else /* !ASCON_UNROLL_LOOPS */
extern const uint64_t constants[12]; extern const uint64_t constants[12];
__forceinline void P12ROUNDS(state_t* s) { forceinline void PROUNDS(state_t* s, int nr) {
for (int i = START(12); i < 12; ++i) ROUND(s, constants[i]); for (int i = START(nr); i < 12; i++) ROUND(s, RC(constants[i]));
}
__forceinline void P8ROUNDS(state_t* s) {
for (int i = START(8); i < 12; ++i) ROUND(s, constants[i]);
}
__forceinline void P6ROUNDS(state_t* s) {
for (int i = START(6); i < 12; ++i) ROUND(s, constants[i]);
} }
#endif #if ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
#if ASCON_INLINE_PERM
__forceinline void P12(state_t* s) { forceinline void P(state_t* s, int nr) {
printstate(" permutation input", s); if (nr == 12) P12ROUNDS(s);
P12ROUNDS(s); if (nr == 8) P8ROUNDS(s);
if (nr == 6) P6ROUNDS(s);
} }
__forceinline void P8(state_t* s) { #elif !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
printstate(" permutation input", s);
P8ROUNDS(s);
}
__forceinline void P6(state_t* s) { void P12(state_t* s);
printstate(" permutation input", s); void P8(state_t* s);
P6ROUNDS(s); void P6(state_t* s);
}
__forceinline void P(state_t* s, int i) { forceinline void P(state_t* s, int nr) {
if (i == 12) P12(s); if (nr == 12) P12(s);
if (i == 8) P8(s); if (nr == 8) P8(s);
if (i == 6) P6(s); if (nr == 6) P6(s);
} }
#elif ASCON_SINGLE_PERM #elif ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
#define P12(s) P(s, 12)
#define P8(s) P(s, 8)
#define P6(s) P(s, 6)
void P(state_t* s, uint8_t rounds); forceinline void P(state_t* s, int nr) { PROUNDS(s, nr); }
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ #else /* !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS */
void P12(state_t* s); void P(state_t* s, int nr);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#endif #endif
......
#ifndef PRINTSTATE_H_ #ifndef PRINTSTATE_H_
#define PRINTSTATE_H_ #define PRINTSTATE_H_
#ifdef NDEBUG #ifdef ASCON_PRINTSTATE
#define printword(text, w) #include "ascon.h"
#define printstate(text, s) #include "word.h"
#else void printword(const char* text, const word_t x);
void printstate(const char* text, const state_t* s);
#include <inttypes.h> #else
#include <stdio.h>
#include "ascon.h" #define printword(text, w) \
#include "word.h" do { \
} while (0)
__forceinline void printword(const char* text, const word_t x) { #define printstate(text, s) \
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); do { \
} } while (0)
__forceinline void printstate(const char* text, const state_t* s) {
printf("%s:\n", text);
printword(" x0", s->x0);
printword(" x1", s->x1);
printword(" x2", s->x2);
printword(" x3", s->x3);
printword(" x4", s->x4);
}
#endif #endif
......
...@@ -4,13 +4,13 @@ ...@@ -4,13 +4,13 @@
#include "ascon.h" #include "ascon.h"
#include "printstate.h" #include "printstate.h"
__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
*K0 = WORD_T(0); *K0 = WORD_T(0);
*K1 = WORD_T(0); *K1 = WORD_T(0);
*K2 = WORD_T(0); *K2 = WORD_T(0);
} }
__forceinline void PINIT(state_t* s) { forceinline void PINIT(state_t* s) {
s->x0 = WORD_T(0); s->x0 = WORD_T(0);
s->x1 = WORD_T(0); s->x1 = WORD_T(0);
s->x2 = WORD_T(0); s->x2 = WORD_T(0);
...@@ -18,34 +18,34 @@ __forceinline void PINIT(state_t* s) { ...@@ -18,34 +18,34 @@ __forceinline void PINIT(state_t* s) {
s->x4 = WORD_T(0); s->x4 = WORD_T(0);
} }
__forceinline void ROUND(state_t* s, uint64_t C) { forceinline void ROUND(state_t* s, word_t C) {
word_t tmp; word_t xtemp;
/* round constant */ /* round constant */
s->x2 = XOR(s->x2, WORD_T(C)); s->x2 = XOR(s->x2, C);
/* s-box layer */ /* s-box layer */
s->x0 = XOR(s->x0, s->x4); s->x0 = XOR(s->x0, s->x4);
s->x4 = XOR(s->x4, s->x3); s->x4 = XOR(s->x4, s->x3);
s->x2 = XOR(s->x2, s->x1); s->x2 = XOR(s->x2, s->x1);
tmp = AND(s->x0, NOT(s->x4)); xtemp = AND(s->x0, NOT(s->x4));
s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1))); s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1)));
s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3))); s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3)));
s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0))); s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0)));
s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2))); s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2)));
s->x3 = XOR(s->x3, tmp); s->x3 = XOR(s->x3, xtemp);
s->x1 = XOR(s->x1, s->x0); s->x1 = XOR(s->x1, s->x0);
s->x3 = XOR(s->x3, s->x2); s->x3 = XOR(s->x3, s->x2);
s->x0 = XOR(s->x0, s->x4); s->x0 = XOR(s->x0, s->x4);
/* linear layer */ /* linear layer */
tmp = XOR(s->x0, ROR64(s->x0, 28 - 19)); xtemp = XOR(s->x0, ROR(s->x0, 28 - 19));
s->x0 = XOR(s->x0, ROR64(tmp, 19)); s->x0 = XOR(s->x0, ROR(xtemp, 19));
tmp = XOR(s->x1, ROR64(s->x1, 61 - 39)); xtemp = XOR(s->x1, ROR(s->x1, 61 - 39));
s->x1 = XOR(s->x1, ROR64(tmp, 39)); s->x1 = XOR(s->x1, ROR(xtemp, 39));
tmp = XOR(s->x2, ROR64(s->x2, 6 - 1)); xtemp = XOR(s->x2, ROR(s->x2, 6 - 1));
s->x2 = XOR(s->x2, ROR64(tmp, 1)); s->x2 = XOR(s->x2, ROR(xtemp, 1));
tmp = XOR(s->x3, ROR64(s->x3, 17 - 10)); xtemp = XOR(s->x3, ROR(s->x3, 17 - 10));
s->x3 = XOR(s->x3, ROR64(tmp, 10)); s->x3 = XOR(s->x3, ROR(xtemp, 10));
tmp = XOR(s->x4, ROR64(s->x4, 41 - 7)); xtemp = XOR(s->x4, ROR(s->x4, 41 - 7));
s->x4 = XOR(s->x4, ROR64(tmp, 7)); s->x4 = XOR(s->x4, ROR(xtemp, 7));
s->x2 = NOT(s->x2); s->x2 = NOT(s->x2);
printstate(" round output", s); printstate(" round output", s);
} }
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <stdint.h> #include <stdint.h>
#include "endian.h" #include "endian.h"
#include "forceinline.h"
#include "interleave.h" #include "interleave.h"
typedef union { typedef union {
...@@ -11,9 +12,9 @@ typedef union { ...@@ -11,9 +12,9 @@ typedef union {
uint8_t b[8]; uint8_t b[8];
} word_t; } word_t;
__forceinline uint8_t ROR8(uint8_t a, int n) { return a >> n | a << (8 - n); } forceinline uint8_t ROR8(uint8_t a, int n) { return a >> n | a << (8 - n); }
__forceinline word_t ROR64(word_t a, int n) { forceinline word_t ROR(word_t a, int n) {
word_t b; word_t b;
b.b[0] = ROR8(a.b[(n + 0) & 0x7], (n + 0) >> 3); b.b[0] = ROR8(a.b[(n + 0) & 0x7], (n + 0) >> 3);
b.b[1] = ROR8(a.b[(n + 1) & 0x7], (n + 1) >> 3); b.b[1] = ROR8(a.b[(n + 1) & 0x7], (n + 1) >> 3);
...@@ -26,54 +27,54 @@ __forceinline word_t ROR64(word_t a, int n) { ...@@ -26,54 +27,54 @@ __forceinline word_t ROR64(word_t a, int n) {
return b; return b;
} }
__forceinline word_t WORD_T(uint64_t x) { forceinline word_t WORD_T(uint64_t x) {
word_t w; word_t w;
w.w = x; w.w = x;
return w; return w;
} }
__forceinline uint64_t UINT64_T(word_t w) { forceinline uint64_t UINT64_T(word_t w) {
uint64_t x; uint64_t x;
x = w.w; x = w.w;
return x; return x;
} }
__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(interleave8(x)); } forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(interleave8(x)); }
__forceinline uint64_t WORDTOU64(word_t w) { return interleave8(UINT64_T(w)); } forceinline uint64_t WORDTOU64(word_t w) { return interleave8(UINT64_T(w)); }
__forceinline word_t NOT(word_t a) { forceinline word_t NOT(word_t a) {
a.w = ~a.w; a.w = ~a.w;
return a; return a;
} }
__forceinline word_t XOR(word_t a, word_t b) { forceinline word_t XOR(word_t a, word_t b) {
a.w ^= b.w; a.w ^= b.w;
return a; return a;
} }
__forceinline word_t AND(word_t a, word_t b) { forceinline word_t AND(word_t a, word_t b) {
a.w &= b.w; a.w &= b.w;
return a; return a;
} }
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
word_t w; word_t w;
w.w = lo2hi.w << 32 | hi2lo.w >> 32; w.w = lo2hi.w << 32 | hi2lo.w >> 32;
return w; return w;
} }
__forceinline uint8_t NOTZERO(word_t a, word_t b) { forceinline int NOTZERO(word_t a, word_t b) {
uint64_t result = a.w | b.w; uint64_t result = a.w | b.w;
result |= result >> 32; result |= result >> 32;
result |= result >> 16; result |= result >> 16;
result |= result >> 8; result |= result >> 8;
return (uint8_t)result; return ((((int)(result & 0xff) - 1) >> 8) & 1) - 1;
} }
__forceinline word_t PAD(int i) { return (word_t){.b[7] = 0x80 >> i}; } forceinline word_t PAD(int i) { return (word_t){.b[7] = 0x80 >> i}; }
__forceinline word_t CLEAR(word_t w, int n) { forceinline word_t CLEAR(word_t w, int n) {
/* undefined for n == 0 */ /* undefined for n == 0 */
uint8_t m = 0xff >> n; uint8_t m = 0xff >> n;
word_t mask = { word_t mask = {
...@@ -89,39 +90,29 @@ __forceinline word_t CLEAR(word_t w, int n) { ...@@ -89,39 +90,29 @@ __forceinline word_t CLEAR(word_t w, int n) {
return AND(w, mask); return AND(w, mask);
} }
__forceinline uint64_t MASK(int n) { forceinline uint64_t MASK(int n) {
/* undefined for n == 0 */ /* undefined for n == 0 */
return ~0ull >> (64 - 8 * n); return ~0ull >> (64 - 8 * n);
} }
__forceinline word_t LOAD64(const uint8_t* bytes) { forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes;
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes & MASK(n); uint64_t x = *(uint64_t*)bytes & MASK(n);
return U64TOWORD(U64BIG(x)); return U64TOWORD(U64BIG(x));
} }
__forceinline void STORE(uint8_t* bytes, word_t w, int n) { forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w); uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(n); *(uint64_t*)bytes &= ~MASK(n);
*(uint64_t*)bytes |= U64BIG(x); *(uint64_t*)bytes |= U64BIG(x);
} }
__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
uint64_t x = 0; uint64_t x = 0;
for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i];
return U64TOWORD(x); return U64TOWORD(x);
} }
__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w); uint64_t x = WORDTOU64(w);
for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i]; for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i];
} }
......
...@@ -121,15 +121,19 @@ ...@@ -121,15 +121,19 @@
forceinline void ascon_loadkey(word_t* K0, word_t* K1, word_t* K2, forceinline void ascon_loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) { const uint8_t* k) {
KINIT(K0, K1, K2); KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 16) {
*K1 = XOR(*K1, LOAD(k, 8));
*K2 = XOR(*K2, LOAD(k + 8, 8));
}
if (CRYPTO_KEYBYTES == 20) { if (CRYPTO_KEYBYTES == 20) {
*K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); *K0 = XOR(*K0, KEYROT(WORD_T(0), LOADBYTES(k, 4)));
k += 4; *K1 = XOR(*K1, LOADBYTES(k + 4, 8));
*K2 = XOR(*K2, LOADBYTES(k + 12, 8));
} }
*K1 = XOR(*K1, LOAD(k, 8));
*K2 = XOR(*K2, LOAD(k + 8, 8));
} }
forceinline void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { forceinline void ascon_aeadinit(state_t* s, const uint8_t* npub,
const uint8_t* k) {
/* load nonce */ /* load nonce */
word_t N0 = LOAD(npub, 8); word_t N0 = LOAD(npub, 8);
word_t N1 = LOAD(npub + 8, 8); word_t N1 = LOAD(npub + 8, 8);
...@@ -138,9 +142,9 @@ forceinline void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { ...@@ -138,9 +142,9 @@ forceinline void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
ascon_loadkey(&K0, &K1, &K2, k); ascon_loadkey(&K0, &K1, &K2, k);
/* initialize */ /* initialize */
PINIT(s); PINIT(s);
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 8)
s->x0 = XOR(s->x0, ASCON_128_IV); s->x0 = XOR(s->x0, ASCON_128_IV);
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 16)
s->x0 = XOR(s->x0, ASCON_128A_IV); s->x0 = XOR(s->x0, ASCON_128A_IV);
if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, ASCON_80PQ_IV); if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, ASCON_80PQ_IV);
if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0); if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0);
...@@ -156,13 +160,13 @@ forceinline void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { ...@@ -156,13 +160,13 @@ forceinline void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
} }
forceinline void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen) { forceinline void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen) {
const int nr = (ASCON_RATE == 8) ? 6 : 8; const int nr = (ASCON_AEAD_RATE == 8) ? 6 : 8;
if (adlen) { if (adlen) {
/* full associated data blocks */ /* full associated data blocks */
AD(); AD();
/* final associated data block */ /* final associated data block */
word_t* px = &s->x0; word_t* px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) { if (ASCON_AEAD_RATE == 16 && adlen >= 8) {
s->x0 = XOR(s->x0, LOAD(ad, 8)); s->x0 = XOR(s->x0, LOAD(ad, 8));
px = &s->x1; px = &s->x1;
ad += 8; ad += 8;
...@@ -179,12 +183,12 @@ forceinline void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen) { ...@@ -179,12 +183,12 @@ forceinline void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen) {
forceinline void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, forceinline void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) { uint64_t mlen) {
const int nr = (ASCON_RATE == 8) ? 6 : 8; const int nr = (ASCON_AEAD_RATE == 8) ? 6 : 8;
/* full plaintext blocks */ /* full plaintext blocks */
PT(); PT();
/* final plaintext block */ /* final plaintext block */
word_t* px = &s->x0; word_t* px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) { if (ASCON_AEAD_RATE == 16 && mlen >= 8) {
s->x0 = XOR(s->x0, LOAD(m, 8)); s->x0 = XOR(s->x0, LOAD(m, 8));
STORE(c, s->x0, 8); STORE(c, s->x0, 8);
px = &s->x1; px = &s->x1;
...@@ -202,12 +206,12 @@ forceinline void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, ...@@ -202,12 +206,12 @@ forceinline void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m,
forceinline void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, forceinline void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) { uint64_t clen) {
const int nr = (ASCON_RATE == 8) ? 6 : 8; const int nr = (ASCON_AEAD_RATE == 8) ? 6 : 8;
/* full ciphertext blocks */ /* full ciphertext blocks */
CT(); CT();
/* final ciphertext block */ /* final ciphertext block */
word_t* px = &s->x0; word_t* px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) { if (ASCON_AEAD_RATE == 16 && clen >= 8) {
word_t cx = LOAD(c, 8); word_t cx = LOAD(c, 8);
s->x0 = XOR(s->x0, cx); s->x0 = XOR(s->x0, cx);
STORE(m, s->x0, 8); STORE(m, s->x0, 8);
...@@ -233,11 +237,11 @@ forceinline void ascon_final(state_t* s, const uint8_t* k) { ...@@ -233,11 +237,11 @@ forceinline void ascon_final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2; word_t K0, K1, K2;
ascon_loadkey(&K0, &K1, &K2, k); ascon_loadkey(&K0, &K1, &K2, k);
/* finalize */ /* finalize */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 8) {
s->x1 = XOR(s->x1, K1); s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2); s->x2 = XOR(s->x2, K2);
} }
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 16) {
s->x2 = XOR(s->x2, K1); s->x2 = XOR(s->x2, K1);
s->x3 = XOR(s->x3, K2); s->x3 = XOR(s->x3, K2);
} }
...@@ -261,7 +265,7 @@ int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, ...@@ -261,7 +265,7 @@ int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
(void)nsec; (void)nsec;
*clen = mlen + CRYPTO_ABYTES; *clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */ /* perform ascon computation */
ascon_init(&s, npub, k); ascon_aeadinit(&s, npub, k);
ascon_adata(&s, ad, adlen); ascon_adata(&s, ad, adlen);
ascon_encrypt(&s, c, m, mlen); ascon_encrypt(&s, c, m, mlen);
ascon_final(&s, k); ascon_final(&s, k);
...@@ -281,7 +285,7 @@ int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen, ...@@ -281,7 +285,7 @@ int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
if (clen < CRYPTO_ABYTES) return -1; if (clen < CRYPTO_ABYTES) return -1;
*mlen = clen = clen - CRYPTO_ABYTES; *mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */ /* perform ascon computation */
ascon_init(&s, npub, k); ascon_aeadinit(&s, npub, k);
ascon_adata(&s, ad, adlen); ascon_adata(&s, ad, adlen);
ascon_decrypt(&s, m, c, clen); ascon_decrypt(&s, m, c, clen);
ascon_final(&s, k); ascon_final(&s, k);
......
#define CRYPTO_VERSION "1.2.5"
#define CRYPTO_KEYBYTES 16 #define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0 #define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16 #define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16 #define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1 #define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16 #define ASCON_AEAD_RATE 16
#ifndef CONFIG_H_ #ifndef CONFIG_H_
#define CONFIG_H_ #define CONFIG_H_
/* inline the Ascon mode */ /* inline the ascon mode */
#ifndef ASCON_INLINE_MODE #ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1 #define ASCON_INLINE_MODE 0
#endif #endif
/* inline the Ascon permutations */ /* inline all permutations */
#ifndef ASCON_INLINE_PERM #ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 1 #define ASCON_INLINE_PERM 1
#endif #endif
/* single function for all permutations */ /* unroll permutation loops */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 0
#endif
/* unroll the permutation loops */
#ifndef ASCON_UNROLL_LOOPS #ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 1 #define ASCON_UNROLL_LOOPS 1
#endif #endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif
#endif /* CONFIG_H_ */ #endif /* CONFIG_H_ */
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* macros for big endian machines */ /* macros for big endian machines */
#ifndef NDEBUG #ifdef PRAGMA_ENDIAN
#pragma message("Using macros for big endian machines") #pragma message("Using macros for big endian machines")
#endif #endif
#define U64BIG(x) (x) #define U64BIG(x) (x)
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
/* macros for little endian machines */ /* macros for little endian machines */
#ifndef NDEBUG #ifdef PRAGMA_ENDIAN
#pragma message("Using macros for little endian machines") #pragma message("Using macros for little endian machines")
#endif #endif
#define U64BIG(x) \ #define U64BIG(x) \
......
#ifndef PERMUTATIONS_H_ #ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_ #define PERMUTATIONS_H_
#include <stdint.h>
#include "api.h"
#include "ascon.h" #include "ascon.h"
#include "config.h" #include "config.h"
#include "printstate.h"
#include "round.h" #include "round.h"
static const uint64_t C[12] = { #define ASCON_128_KEYBYTES 16
#define ASCON_128A_KEYBYTES 16
#define ASCON_80PQ_KEYBYTES 20
#define ASCON_128_RATE 8
#define ASCON_128A_RATE 16
#define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32
#define ASCON_128_IV WORD_T(0x80400c0600000000ull)
#define ASCON_128A_IV WORD_T(0x80800c0800000000ull)
#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000ull)
#define ASCON_HASH_IV WORD_T(0x00400c0000000100ull)
#define ASCON_XOF_IV WORD_T(0x00400c0000000000ull)
#define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull)
#define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull)
#define ASCON_HASH_IV2 WORD_T(0xb48a92db98d5da62ull)
#define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull)
#define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull)
#define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull)
#define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull)
#define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull)
#define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull)
#define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull)
#define START(n) ((3 + (n)) << 4 | (12 - (n)))
#define RC(c) WORD_T(c)
const uint64_t C[12] = {
0xffffffffffffff0full, 0xffffffffffffff1eull, 0xffffffffffffff2dull, 0xffffffffffffff0full, 0xffffffffffffff1eull, 0xffffffffffffff2dull,
0xffffffffffffff3cull, 0xffffffffffffff4bull, 0xffffffffffffff5aull, 0xffffffffffffff3cull, 0xffffffffffffff4bull, 0xffffffffffffff5aull,
0xffffffffffffff69ull, 0xffffffffffffff78ull, 0xffffffffffffff87ull, 0xffffffffffffff69ull, 0xffffffffffffff78ull, 0xffffffffffffff87ull,
0xffffffffffffff96ull, 0xffffffffffffffa5ull, 0xffffffffffffffb4ull, 0xffffffffffffff96ull, 0xffffffffffffffa5ull, 0xffffffffffffffb4ull,
}; };
#define P12() \ #define P12ROUNDS(s) \
ROUND(0) \
ROUND(8) \
ROUND(16) \
ROUND(24) \
ROUND(32) \
ROUND(40) \
ROUND(48) \
ROUND(56) \
ROUND(64) \
ROUND(72) \
ROUND(80) \
ROUND(88)
#define P8ROUNDS(s) \
ROUND(32) \
ROUND(40) \
ROUND(48) \
ROUND(56) \
ROUND(64) \
ROUND(72) \
ROUND(80) \
ROUND(88)
#define P6ROUNDS(s) \
ROUND(48) \
ROUND(56) \
ROUND(64) \
ROUND(72) \
ROUND(80) \
ROUND(88)
forceinline void P12(state_t* s) {
__asm__ __volatile__ ( \ __asm__ __volatile__ ( \
".arm \n\t" \ ".arm \n\t" \
".fpu neon \n\t" \ ".fpu neon \n\t" \
"vldm %[s], {d0-d4} \n\t" \ "vldm %[s], {d0-d4} \n\t" \
"vmvn d2, d2 \n\t" \ "vmvn d2, d2 \n\t" \
ROUND(0) \ P12ROUNDS(s) \
ROUND(8) \
ROUND(16) \
ROUND(24) \
ROUND(32) \
ROUND(40) \
ROUND(48) \
ROUND(56) \
ROUND(64) \
ROUND(72) \
ROUND(80) \
ROUND(88) \
"vmvn d2, d2 \n\t" \ "vmvn d2, d2 \n\t" \
"vstm %[s], {d0-d4} \n\t" \ "vstm %[s], {d0-d4} \n\t" \
:: [s] "r" (&s), [C] "r" (C) \ :: [s] "r" (s), [C] "r" (C) \
: "d0", "d1", "d2", "d3", "d4", \ : "d0", "d1", "d2", "d3", "d4", \
"d10", "d11", "d12", "d13", "d14", \ "d10", "d11", "d12", "d13", "d14", \
"d20", "d21", "d22", "d23", "d24", \ "d20", "d21", "d22", "d23", "d24", \
"d31", "memory") "d31", "memory");
}
#define P8() \ forceinline void P8(state_t* s) {
__asm__ __volatile__ ( \ __asm__ __volatile__ ( \
".arm \n\t" \ ".arm \n\t" \
".fpu neon \n\t" \ ".fpu neon \n\t" \
"vldm %[s], {d0-d4} \n\t" \ "vldm %[s], {d0-d4} \n\t" \
"vmvn d2, d2 \n\t" \ "vmvn d2, d2 \n\t" \
ROUND(32) \ P8ROUNDS(s) \
ROUND(40) \
ROUND(48) \
ROUND(56) \
ROUND(64) \
ROUND(72) \
ROUND(80) \
ROUND(88) \
"vmvn d2, d2 \n\t" \ "vmvn d2, d2 \n\t" \
"vstm %[s], {d0-d4} \n\t" \ "vstm %[s], {d0-d4} \n\t" \
:: [s] "r" (&s), [C] "r" (C) \ :: [s] "r" (s), [C] "r" (C) \
: "d0", "d1", "d2", "d3", "d4", \ : "d0", "d1", "d2", "d3", "d4", \
"d10", "d11", "d12", "d13", "d14", \ "d10", "d11", "d12", "d13", "d14", \
"d20", "d21", "d22", "d23", "d24", \ "d20", "d21", "d22", "d23", "d24", \
"d31", "memory") "d31", "memory");
}
#define AD() \
do { \ forceinline void P6(state_t* s) {
uint32_t adlen_hi = (uint32_t)(adlen >> 32); \ __asm__ __volatile__ ( \
uint32_t adlen_lo = (uint32_t)adlen; \ ".arm \n\t" \
__asm__ __volatile__ ( \ ".fpu neon \n\t" \
".arm \n\t" \ "vldm %[s], {d0-d4} \n\t" \
".fpu neon \n\t" \ "vmvn d2, d2 \n\t" \
"cmp %[adlen_hi], #0 \n\t" \ P6ROUNDS(s) \
"cmpeq %[adlen_lo], #15 \n\t" \ "vmvn d2, d2 \n\t" \
"bls .LAD1 \n\t" \ "vstm %[s], {d0-d4} \n\t" \
"vldm %[s], {d0-d4} \n\t" \ :: [s] "r" (s), [C] "r" (C) \
".LAD0: \n\t" \ : "d0", "d1", "d2", "d3", "d4", \
"vldm %[ad]!, {d16,d17} \n\t" \ "d10", "d11", "d12", "d13", "d14", \
"vrev64.8 q8, q8 \n\t" \ "d20", "d21", "d22", "d23", "d24", \
"veor q0, q0, q8 \n\t" \ "d31", "memory");
"vmvn d2, d2 \n\t" \ }
ROUND(32) \
ROUND(40) \ forceinline void P(state_t* s, int nr) {
ROUND(48) \ if (nr == 12) P12(s);
ROUND(56) \ if (nr == 8) P8(s);
ROUND(64) \ if (nr == 6) P6(s);
ROUND(72) \ }
ROUND(80) \
ROUND(88) \
"vmvn d2, d2 \n\t" \
"subs %[adlen_lo], %[adlen_lo], #16 \n\t" \
"sbc %[adlen_hi], %[adlen_hi], #0 \n\t" \
"cmp %[adlen_hi], #0 \n\t" \
"cmpeq %[adlen_lo], #15 \n\t" \
"bhi .LAD0 \n\t" \
"vstm %[s], {d0-d4} \n\t" \
".LAD1: \n\t" \
: [adlen_hi] "+r" (adlen_hi), [adlen_lo] "+r" (adlen_lo), \
[ad] "+r" (ad) \
: [s] "r" (&s), [C] "r" (C) \
: "d0", "d1", "d2", "d3", "d4", \
"d10", "d11", "d12", "d13", "d14", "d16", "d17", \
"d20", "d21", "d22", "d23", "d24", \
"d31", "memory"); \
adlen = (uint64_t)adlen_hi << 32 | adlen_lo; \
} while (0)
#define PT() \
do { \
uint32_t mlen_hi = (uint32_t)(mlen >> 32); \
uint32_t mlen_lo = (uint32_t)mlen; \
__asm__ __volatile__ ( \
".arm \n\t" \
".fpu neon \n\t" \
"cmp %[mlen_hi], #0 \n\t" \
"cmpeq %[mlen_lo], #15 \n\t" \
"bls .LPT1 \n\t" \
"vldm %[s], {d0-d4} \n\t" \
".LPT0: \n\t" \
"vldm %[m]!, {d16,d17} \n\t" \
"vrev64.8 q8, q8 \n\t" \
"veor q0, q0, q8 \n\t" \
"vrev64.8 q13, q0 \n\t" \
"vstm %[c]!, {d26,d27} \n\t" \
"vmvn d2, d2 \n\t" \
ROUND(32) \
ROUND(40) \
ROUND(48) \
ROUND(56) \
ROUND(64) \
ROUND(72) \
ROUND(80) \
ROUND(88) \
"vmvn d2, d2 \n\t" \
"subs %[mlen_lo], %[mlen_lo], #16 \n\t" \
"sbc %[mlen_hi], %[mlen_hi], #0 \n\t" \
"cmp %[mlen_hi], #0 \n\t" \
"cmpeq %[mlen_lo], #15 \n\t" \
"bhi .LPT0 \n\t" \
"vstm %[s], {d0-d4} \n\t" \
".LPT1: \n\t" \
: [mlen_hi] "+r" (mlen_hi), [mlen_lo] "+r" (mlen_lo), \
[m] "+r" (m), [c] "+r" (c) \
: [s] "r" (&s), [C] "r" (C) \
: "d0", "d1", "d2", "d3", "d4", \
"d10", "d11", "d12", "d13", "d14", "d16", "d17", \
"d20", "d21", "d22", "d23", "d24", "d26", "d27", \
"d31", "memory"); \
mlen = (uint64_t)mlen_hi << 32 | mlen_lo; \
} while (0)
#define CT() \
do { \
uint32_t clen_hi = (uint32_t)(clen >> 32); \
uint32_t clen_lo = (uint32_t)clen; \
__asm__ __volatile__ ( \
".arm \n\t" \
".fpu neon \n\t" \
"cmp %[clen_hi], #0 \n\t" \
"cmpeq %[clen_lo], #15 \n\t" \
"bls .LCT1 \n\t" \
"vldm %[s], {d0-d4} \n\t" \
".LCT0: \n\t" \
"vldm %[c]!, {d26,d27} \n\t" \
"vrev64.8 q8, q0 \n\t" \
"veor q8, q8, q13 \n\t" \
"vrev64.8 q0, q13 \n\t" \
"vstm %[m]!, {d16,d17} \n\t" \
"vmvn d2, d2 \n\t" \
ROUND(32) \
ROUND(40) \
ROUND(48) \
ROUND(56) \
ROUND(64) \
ROUND(72) \
ROUND(80) \
ROUND(88) \
"vmvn d2, d2 \n\t" \
"subs %[clen_lo], %[clen_lo], #16 \n\t" \
"sbc %[clen_hi], %[clen_hi], #0 \n\t" \
"cmp %[clen_hi], #0 \n\t" \
"cmpeq %[clen_lo], #15 \n\t" \
"bhi .LCT0 \n\t" \
"vstm %[s], {d0-d4} \n\t" \
".LCT1: \n\t" \
: [clen_hi] "+r" (clen_hi), [clen_lo] "+r" (clen_lo), \
[m] "+r" (m), [c] "+r" (c) \
: [s] "r" (&s), [C] "r" (C) \
: "d0", "d1", "d2", "d3", "d4", \
"d10", "d11", "d12", "d13", "d14", "d16", "d17", \
"d20", "d21", "d22", "d23", "d24", "d26", "d27", \
"d31", "memory"); \
clen = (uint64_t)clen_hi << 32 | clen_lo; \
} while (0)
#endif /* PERMUTATIONS_H_ */ #endif /* PERMUTATIONS_H_ */
#ifndef ROUND_H_ #ifndef ROUND_H_
#define ROUND_H_ #define ROUND_H_
#include "ascon.h"
#include "printstate.h"
forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
*K0 = WORD_T(0);
*K1 = WORD_T(0);
*K2 = WORD_T(0);
}
forceinline void PINIT(state_t* s) {
s->x0 = WORD_T(0);
s->x1 = WORD_T(0);
s->x2 = WORD_T(0);
s->x3 = WORD_T(0);
s->x4 = WORD_T(0);
}
/* clang-format off */ /* clang-format off */
#define ROUND(OFFSET) \ #define ROUND(OFFSET) \
"vldr d31, [%[C], #" #OFFSET "] \n\t" \ "vldr d31, [%[C], #" #OFFSET "] \n\t" \
......
...@@ -3,28 +3,73 @@ ...@@ -3,28 +3,73 @@
#include <stdint.h> #include <stdint.h>
#define WORDTOU64 #include "endian.h"
#define U64TOWORD #include "forceinline.h"
typedef uint64_t word_t; typedef uint64_t word_t;
#define WORD_T
#define UINT64_T
#define U64TOWORD
#define WORDTOU64
/* get byte from Ascon 64-bit word */ /* get byte from Ascon 64-bit word */
#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) #define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i))))
/* set byte in Ascon 64-bit word */ /* set byte in Ascon 64-bit word */
#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) #define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i)))
/* set padding byte in Ascon 64-bit word */ forceinline word_t ROR(word_t x, int n) { return x >> n | x << (64 - n); }
#define PAD(i) SETBYTE(0x80, i)
forceinline word_t NOT(word_t a) { return ~a; }
forceinline word_t XOR(word_t a, word_t b) { return a ^ b; }
forceinline word_t AND(word_t a, word_t b) { return a & b; }
forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
return lo2hi << 32 | hi2lo >> 32;
}
forceinline int NOTZERO(word_t a, word_t b) {
uint64_t result = a | b;
result |= result >> 32;
result |= result >> 16;
result |= result >> 8;
return ((((int)(result & 0xff) - 1) >> 8) & 1) - 1;
}
forceinline word_t PAD(int i) { return 0x80ull << (56 - 8 * i); }
forceinline word_t CLEAR(word_t w, int n) {
/* undefined for n == 0 */
uint64_t mask = 0x00ffffffffffffffull >> (n * 8 - 8);
return w & mask;
}
forceinline uint64_t MASK(int n) {
/* undefined for n == 0 */
return ~0ull >> (64 - 8 * n);
}
forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes & MASK(n);
return U64BIG(x);
}
forceinline void STORE(uint8_t* bytes, word_t w, int n) {
*(uint64_t*)bytes &= ~MASK(n);
*(uint64_t*)bytes |= U64BIG(w);
}
static inline uint64_t LOADBYTES(const uint8_t* bytes, int n) { forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
uint64_t x = 0; uint64_t x = 0;
for (int i = 0; i < n; ++i) x |= SETBYTE(bytes[i], i); for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i];
return x; return x;
} }
static inline void STOREBYTES(uint8_t* bytes, uint64_t x, int n) { forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
for (int i = 0; i < n; ++i) bytes[i] = GETBYTE(x, i); for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&w)[7 - i];
} }
static inline uint64_t CLEARBYTES(uint64_t x, int n) { static inline uint64_t CLEARBYTES(uint64_t x, int n) {
......
#define CRYPTO_VERSION "1.2.5"
#define CRYPTO_KEYBYTES 16 #define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0 #define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16 #define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16 #define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1 #define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16 #define ASCON_AEAD_RATE 16
...@@ -3,15 +3,14 @@ ...@@ -3,15 +3,14 @@
#include <stdint.h> #include <stdint.h>
#include "config.h"
#include "word.h" #include "word.h"
typedef struct { typedef struct {
word_t x0, x1, x2, x3, x4; word_t x0, x1, x2, x3, x4;
} state_t; } state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); void ascon_aeadinit(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k); void ascon_final(state_t* s, const uint8_t* k);
......
#ifndef CONFIG_H_ #ifndef CONFIG_H_
#define CONFIG_H_ #define CONFIG_H_
/* inline the Ascon mode */ /* inline the ascon mode */
#ifndef ASCON_INLINE_MODE #ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1 #define ASCON_INLINE_MODE 1
#endif #endif
/* inline the Ascon permutations */ /* inline all permutations */
#ifndef ASCON_INLINE_PERM #ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 1 #define ASCON_INLINE_PERM 1
#endif #endif
/* single function for all permutations */ /* unroll permutation loops */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 0
#endif
/* unroll the permutation loops */
#ifndef ASCON_UNROLL_LOOPS #ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 1 #define ASCON_UNROLL_LOOPS 1
#endif #endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif
#endif /* CONFIG_H_ */ #endif /* CONFIG_H_ */
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* macros for big endian machines */ /* macros for big endian machines */
#ifndef NDEBUG #ifdef PRAGMA_ENDIAN
#pragma message("Using macros for big endian machines") #pragma message("Using macros for big endian machines")
#endif #endif
#define U64BIG(x) (x) #define U64BIG(x) (x)
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
/* macros for little endian machines */ /* macros for little endian machines */
#ifndef NDEBUG #ifdef PRAGMA_ENDIAN
#pragma message("Using macros for little endian machines") #pragma message("Using macros for little endian machines")
#endif #endif
#define U64BIG(x) \ #define U64BIG(x) \
......
#include "permutations.h" #include "permutations.h"
#include "round.h" #if !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
#if ASCON_INLINE_PERM void P12(state_t* s) { P12ROUNDS(s); }
void P8(state_t* s) { P8ROUNDS(s); }
void P6(state_t* s) { P6ROUNDS(s); }
#elif ASCON_SINGLE_PERM
void P(state_t* s, uint8_t rounds) {
printstate(" permutation input", s);
for (int i = START(rounds); i > 0x4a; i -= 0x0f) ROUND(s, i);
}
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16
void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
#endif #endif
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 #if !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
void P6(state_t* s) {
printstate(" permutation input", s); void P(state_t* s, int nr) { PROUNDS(s, nr); }
P6ROUNDS(s);
}
#endif
#endif #endif
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include "api.h" #include "api.h"
#include "ascon.h" #include "ascon.h"
#include "config.h"
#include "printstate.h" #include "printstate.h"
#include "round.h" #include "round.h"
...@@ -14,18 +15,29 @@ ...@@ -14,18 +15,29 @@
#define ASCON_128_RATE 8 #define ASCON_128_RATE 8
#define ASCON_128A_RATE 16 #define ASCON_128A_RATE 16
#define ASCON_HASH_RATE 8
#define ASCON_128_PA_ROUNDS 12 #define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6 #define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PA_ROUNDS 12
#define ASCON_128A_PB_ROUNDS 8 #define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_PA_ROUNDS 12
#define ASCON_HASH_PB_ROUNDS 12
#define ASCON_HASHA_PA_ROUNDS 12
#define ASCON_HASHA_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32 #define ASCON_HASH_BYTES 32
#define ASCON_128_IV WORD_T(0x80400c0600000000) #define ASCON_128_IV WORD_T(0x80400c0600000000ull)
#define ASCON_128A_IV WORD_T(0x80800c0800000000) #define ASCON_128A_IV WORD_T(0x80800c0800000000ull)
#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000) #define ASCON_80PQ_IV WORD_T(0xa0400c0600000000ull)
#define ASCON_HASH_IV WORD_T(0x00400c0000000100) #define ASCON_HASH_IV WORD_T(0x00400c0000000100ull)
#define ASCON_XOF_IV WORD_T(0x00400c0000000000) #define ASCON_HASHA_IV WORD_T(0x00400c0400000100ull)
#define ASCON_XOF_IV WORD_T(0x00400c0000000000ull)
#define ASCON_XOFA_IV WORD_T(0x00400c0400000000ull)
#define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull) #define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull)
#define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull) #define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull)
...@@ -33,130 +45,93 @@ ...@@ -33,130 +45,93 @@
#define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull) #define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull)
#define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull) #define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull)
#define ASCON_HASHA_IV0 WORD_T(0x01470194fc6528a6ull)
#define ASCON_HASHA_IV1 WORD_T(0x738ec38ac0adffa7ull)
#define ASCON_HASHA_IV2 WORD_T(0x2ec8e3296c76384cull)
#define ASCON_HASHA_IV3 WORD_T(0xd6f6a54d7f52377dull)
#define ASCON_HASHA_IV4 WORD_T(0xa13c42a223be8d87ull)
#define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull) #define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull)
#define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull) #define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull)
#define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull) #define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull)
#define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull) #define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull)
#define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull) #define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 #define ASCON_XOFA_IV0 WORD_T(0x44906568b77b9832ull)
#define IV ASCON_128_IV #define ASCON_XOFA_IV1 WORD_T(0xcd8d6cae53455532ull)
#define PA_ROUNDS 12 #define ASCON_XOFA_IV2 WORD_T(0xf7b5212756422129ull)
#define PB_ROUNDS 6 #define ASCON_XOFA_IV3 WORD_T(0x246885e1de0d225bull)
#define PB P6 #define ASCON_XOFA_IV4 WORD_T(0xa8cb5ce33449973full)
#endif
#if ASCON_RATE == 16
#define IV ASCON_128A_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define PB P8
#endif
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20
#define IV ASCON_80PQ_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#define START(n) ((3 + (n)) << 4 | (12 - (n))) #define START(n) ((3 + (n)) << 4 | (12 - (n)))
#define RC(c) WORD_T(c)
#if ASCON_UNROLL_LOOPS
forceinline void P12ROUNDS(state_t* s) {
__forceinline void P12ROUNDS(state_t* s) { ROUND(s, RC(0xf0));
ROUND(s, 0xf0); ROUND(s, RC(0xe1));
ROUND(s, 0xe1); ROUND(s, RC(0xd2));
ROUND(s, 0xd2); ROUND(s, RC(0xc3));
ROUND(s, 0xc3); ROUND(s, RC(0xb4));
ROUND(s, 0xb4); ROUND(s, RC(0xa5));
ROUND(s, 0xa5); ROUND(s, RC(0x96));
ROUND(s, 0x96); ROUND(s, RC(0x87));
ROUND(s, 0x87); ROUND(s, RC(0x78));
ROUND(s, 0x78); ROUND(s, RC(0x69));
ROUND(s, 0x69); ROUND(s, RC(0x5a));
ROUND(s, 0x5a); ROUND(s, RC(0x4b));
ROUND(s, 0x4b);
}
__forceinline void P8ROUNDS(state_t* s) {
ROUND(s, 0xb4);
ROUND(s, 0xa5);
ROUND(s, 0x96);
ROUND(s, 0x87);
ROUND(s, 0x78);
ROUND(s, 0x69);
ROUND(s, 0x5a);
ROUND(s, 0x4b);
}
__forceinline void P6ROUNDS(state_t* s) {
ROUND(s, 0x96);
ROUND(s, 0x87);
ROUND(s, 0x78);
ROUND(s, 0x69);
ROUND(s, 0x5a);
ROUND(s, 0x4b);
} }
#else /* !ASCON_UNROLL_LOOPS */ forceinline void P8ROUNDS(state_t* s) {
ROUND(s, RC(0xb4));
__forceinline void P12ROUNDS(state_t* s) { ROUND(s, RC(0xa5));
for (int i = START(12); i > 0x4a; i -= 0x0f) ROUND(s, i); ROUND(s, RC(0x96));
ROUND(s, RC(0x87));
ROUND(s, RC(0x78));
ROUND(s, RC(0x69));
ROUND(s, RC(0x5a));
ROUND(s, RC(0x4b));
} }
__forceinline void P8ROUNDS(state_t* s) { forceinline void P6ROUNDS(state_t* s) {
for (int i = START(8); i > 0x4a; i -= 0x0f) ROUND(s, i); ROUND(s, RC(0x96));
ROUND(s, RC(0x87));
ROUND(s, RC(0x78));
ROUND(s, RC(0x69));
ROUND(s, RC(0x5a));
ROUND(s, RC(0x4b));
} }
__forceinline void P6ROUNDS(state_t* s) { forceinline void PROUNDS(state_t* s, int nr) {
for (int i = START(6); i > 0x4a; i -= 0x0f) ROUND(s, i); for (int i = START(nr); i > 0x4a; i -= 0x0f) ROUND(s, RC(i));
} }
#endif #if ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
#if ASCON_INLINE_PERM
__forceinline void P12(state_t* s) { forceinline void P(state_t* s, int nr) {
printstate(" permutation input", s); if (nr == 12) P12ROUNDS(s);
P12ROUNDS(s); if (nr == 8) P8ROUNDS(s);
if (nr == 6) P6ROUNDS(s);
} }
__forceinline void P8(state_t* s) { #elif !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
printstate(" permutation input", s);
P8ROUNDS(s);
}
__forceinline void P6(state_t* s) { void P12(state_t* s);
printstate(" permutation input", s); void P8(state_t* s);
P6ROUNDS(s); void P6(state_t* s);
}
__forceinline void P(state_t* s, int i) { forceinline void P(state_t* s, int nr) {
if (i == 12) P12(s); if (nr == 12) P12(s);
if (i == 8) P8(s); if (nr == 8) P8(s);
if (i == 6) P6(s); if (nr == 6) P6(s);
} }
#elif ASCON_SINGLE_PERM #elif ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
#define P12(s) P(s, 12)
#define P8(s) P(s, 8)
#define P6(s) P(s, 6)
void P(state_t* s, uint8_t rounds); forceinline void P(state_t* s, int nr) { PROUNDS(s, nr); }
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ #else /* !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS */
void P12(state_t* s); void P(state_t* s, int nr);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#endif #endif
......
#ifndef PRINTSTATE_H_ #ifndef PRINTSTATE_H_
#define PRINTSTATE_H_ #define PRINTSTATE_H_
#ifdef NDEBUG #ifdef ASCON_PRINTSTATE
#define printword(text, w) #include "ascon.h"
#define printstate(text, s) #include "word.h"
#else void printword(const char* text, const word_t x);
void printstate(const char* text, const state_t* s);
#include <inttypes.h> #else
#include <stdio.h>
#include "ascon.h" #define printword(text, w) \
#include "word.h" do { \
} while (0)
__forceinline void printword(const char* text, const word_t x) { #define printstate(text, s) \
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); do { \
} } while (0)
__forceinline void printstate(const char* text, const state_t* s) {
printf("%s:\n", text);
printword(" x0", s->x0);
printword(" x1", s->x1);
printword(" x2", s->x2);
printword(" x3", s->x3);
printword(" x4", s->x4);
}
#endif #endif
......
...@@ -4,13 +4,13 @@ ...@@ -4,13 +4,13 @@
#include "ascon.h" #include "ascon.h"
#include "printstate.h" #include "printstate.h"
__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
*K0 = WORD_T(0); *K0 = WORD_T(0);
*K1 = WORD_T(0); *K1 = WORD_T(0);
*K2 = WORD_T(0); *K2 = WORD_T(0);
} }
__forceinline void PINIT(state_t* s) { forceinline void PINIT(state_t* s) {
s->x0 = WORD_T(0); s->x0 = WORD_T(0);
s->x1 = WORD_T(0); s->x1 = WORD_T(0);
s->x2 = WORD_T(0); s->x2 = WORD_T(0);
...@@ -18,51 +18,34 @@ __forceinline void PINIT(state_t* s) { ...@@ -18,51 +18,34 @@ __forceinline void PINIT(state_t* s) {
s->x4 = WORD_T(0); s->x4 = WORD_T(0);
} }
__forceinline void ROUND(state_t* s, uint64_t C) { forceinline void ROUND(state_t* s, word_t C) {
state_t t; state_t t;
s->x2 ^= C; /* round constant */
s->x0 ^= s->x4; s->x2 = XOR(s->x2, C);
s->x4 ^= s->x3; /* s-box layer */
s->x2 ^= s->x1; s->x0 = XOR(s->x0, s->x4);
t.x0 = s->x0; s->x4 = XOR(s->x4, s->x3);
t.x4 = s->x4; s->x2 = XOR(s->x2, s->x1);
t.x3 = s->x3; t.x0 = XOR(s->x0, AND(NOT(s->x1), s->x2));
t.x1 = s->x1; t.x2 = XOR(s->x2, AND(NOT(s->x3), s->x4));
t.x2 = s->x2; t.x4 = XOR(s->x4, AND(NOT(s->x0), s->x1));
s->x0 = t.x0 ^ (~t.x1 & t.x2); t.x1 = XOR(s->x1, AND(NOT(s->x2), s->x3));
s->x2 = t.x2 ^ (~t.x3 & t.x4); t.x3 = XOR(s->x3, AND(NOT(s->x4), s->x0));
s->x4 = t.x4 ^ (~t.x0 & t.x1); t.x1 = XOR(t.x1, t.x0);
s->x1 = t.x1 ^ (~t.x2 & t.x3); t.x3 = XOR(t.x3, t.x2);
s->x3 = t.x3 ^ (~t.x4 & t.x0); t.x0 = XOR(t.x0, t.x4);
s->x1 ^= s->x0; /* linear layer */
t.x1 = s->x1; s->x2 = XOR(t.x2, ROR(t.x2, 6 - 1));
s->x1 = ROR64(s->x1, 39); s->x3 = XOR(t.x3, ROR(t.x3, 17 - 10));
s->x3 ^= s->x2; s->x4 = XOR(t.x4, ROR(t.x4, 41 - 7));
t.x2 = s->x2; s->x0 = XOR(t.x0, ROR(t.x0, 28 - 19));
s->x2 = ROR64(s->x2, 1); s->x1 = XOR(t.x1, ROR(t.x1, 61 - 39));
t.x4 = s->x4; s->x2 = XOR(t.x2, ROR(s->x2, 1));
t.x2 ^= s->x2; s->x3 = XOR(t.x3, ROR(s->x3, 10));
s->x2 = ROR64(s->x2, 6 - 1); s->x4 = XOR(t.x4, ROR(s->x4, 7));
t.x3 = s->x3; s->x0 = XOR(t.x0, ROR(s->x0, 19));
t.x1 ^= s->x1; s->x1 = XOR(t.x1, ROR(s->x1, 39));
s->x3 = ROR64(s->x3, 10); s->x2 = NOT(s->x2);
s->x0 ^= s->x4;
s->x4 = ROR64(s->x4, 7);
t.x3 ^= s->x3;
s->x2 ^= t.x2;
s->x1 = ROR64(s->x1, 61 - 39);
t.x0 = s->x0;
s->x2 = ~s->x2;
s->x3 = ROR64(s->x3, 17 - 10);
t.x4 ^= s->x4;
s->x4 = ROR64(s->x4, 41 - 7);
s->x3 ^= t.x3;
s->x1 ^= t.x1;
s->x0 = ROR64(s->x0, 19);
s->x4 ^= t.x4;
t.x0 ^= s->x0;
s->x0 = ROR64(s->x0, 28 - 19);
s->x0 ^= t.x0;
printstate(" round output", s); printstate(" round output", s);
} }
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <stdint.h> #include <stdint.h>
#include "endian.h" #include "endian.h"
#include "forceinline.h"
typedef uint64_t word_t; typedef uint64_t word_t;
...@@ -12,69 +13,57 @@ typedef uint64_t word_t; ...@@ -12,69 +13,57 @@ typedef uint64_t word_t;
#define U64TOWORD #define U64TOWORD
#define WORDTOU64 #define WORDTOU64
__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } forceinline word_t ROR(word_t x, int n) { return x >> n | x << (64 - n); }
__forceinline word_t NOT(word_t a) { return ~a; } forceinline word_t NOT(word_t a) { return ~a; }
__forceinline word_t XOR(word_t a, word_t b) { return a ^ b; } forceinline word_t XOR(word_t a, word_t b) { return a ^ b; }
__forceinline word_t AND(word_t a, word_t b) { return a & b; } forceinline word_t AND(word_t a, word_t b) { return a & b; }
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
return lo2hi << 32 | hi2lo >> 32; return lo2hi << 32 | hi2lo >> 32;
} }
__forceinline uint8_t NOTZERO(word_t a, word_t b) { forceinline int NOTZERO(word_t a, word_t b) {
uint64_t result = a | b; uint64_t result = a | b;
result |= result >> 32; result |= result >> 32;
result |= result >> 16; result |= result >> 16;
result |= result >> 8; result |= result >> 8;
return (uint8_t)result; return ((((int)(result & 0xff) - 1) >> 8) & 1) - 1;
} }
__forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } forceinline word_t PAD(int i) { return 0x80ull << (56 - 8 * i); }
__forceinline word_t CLEAR(word_t w, int n) { forceinline word_t CLEAR(word_t w, int n) {
/* undefined for n == 0 */ /* undefined for n == 0 */
uint64_t mask = 0x00ffffffffffffffull >> (n * 8 - 8); uint64_t mask = 0x00ffffffffffffffull >> (n * 8 - 8);
return AND(w, WORD_T(mask)); return w & mask;
} }
__forceinline uint64_t MASK(int n) { forceinline uint64_t MASK(int n) {
/* undefined for n == 0 */ /* undefined for n == 0 */
return ~0ull >> (64 - 8 * n); return ~0ull >> (64 - 8 * n);
} }
__forceinline word_t LOAD64(const uint8_t* bytes) { forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes;
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes & MASK(n); uint64_t x = *(uint64_t*)bytes & MASK(n);
return U64TOWORD(U64BIG(x)); return U64BIG(x);
} }
__forceinline void STORE(uint8_t* bytes, word_t w, int n) { forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(n); *(uint64_t*)bytes &= ~MASK(n);
*(uint64_t*)bytes |= U64BIG(x); *(uint64_t*)bytes |= U64BIG(w);
} }
__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) { forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
uint64_t x = 0; uint64_t x = 0;
for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i]; for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i];
return U64TOWORD(x); return x;
} }
__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) { forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w); for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&w)[7 - i];
for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i];
} }
#endif /* WORD_H_ */ #endif /* WORD_H_ */
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment