Commit c1af5db0 by Enrico Pozzobon

Finalists

parent 9c6d9e4a

Too many changes to show.

To preserve performance only 1000 of 1000+ files are displayed.

......@@ -22,15 +22,19 @@
forceinline void ascon_loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 16) {
*K1 = XOR(*K1, LOAD(k, 8));
*K2 = XOR(*K2, LOAD(k + 8, 8));
}
if (CRYPTO_KEYBYTES == 20) {
*K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4)));
k += 4;
*K0 = XOR(*K0, KEYROT(WORD_T(0), LOADBYTES(k, 4)));
*K1 = XOR(*K1, LOADBYTES(k + 4, 8));
*K2 = XOR(*K2, LOADBYTES(k + 12, 8));
}
*K1 = XOR(*K1, LOAD(k, 8));
*K2 = XOR(*K2, LOAD(k + 8, 8));
}
forceinline void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
forceinline void ascon_aeadinit(state_t* s, const uint8_t* npub,
const uint8_t* k) {
/* load nonce */
word_t N0 = LOAD(npub, 8);
word_t N1 = LOAD(npub + 8, 8);
......@@ -39,9 +43,9 @@ forceinline void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
ascon_loadkey(&K0, &K1, &K2, k);
/* initialize */
PINIT(s);
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8)
if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 8)
s->x0 = XOR(s->x0, ASCON_128_IV);
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16)
if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 16)
s->x0 = XOR(s->x0, ASCON_128A_IV);
if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, ASCON_80PQ_IV);
if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0);
......@@ -58,23 +62,23 @@ forceinline void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
forceinline void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen) {
const __m512i u64big = AVX512_SHUFFLE_U64BIG;
const int mask = (ASCON_RATE == 8) ? 0xff : 0xffff;
const int nr = (ASCON_RATE == 8) ? 6 : 8;
const int mask = (ASCON_AEAD_RATE == 8) ? 0xff : 0xffff;
const int nr = (ASCON_AEAD_RATE == 8) ? 6 : 8;
state_t r = *s, t;
if (adlen) {
/* full associated data blocks */
while (adlen >= ASCON_RATE) {
while (adlen >= ASCON_AEAD_RATE) {
t.z = _mm512_maskz_loadu_epi8(mask, ad);
t.z = _mm512_maskz_shuffle_epi8(mask, t.z, u64big);
r.z = _mm512_xor_epi64(r.z, t.z);
P(&r, nr);
ad += ASCON_RATE;
adlen -= ASCON_RATE;
ad += ASCON_AEAD_RATE;
adlen -= ASCON_AEAD_RATE;
}
*s = r;
/* final associated data block */
word_t* px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) {
if (ASCON_AEAD_RATE == 16 && adlen >= 8) {
s->x0 = XOR(s->x0, LOAD(ad, 8));
px = &s->x1;
ad += 8;
......@@ -92,25 +96,25 @@ forceinline void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen) {
forceinline void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
const __m512i u64big = AVX512_SHUFFLE_U64BIG;
const int mask = (ASCON_RATE == 8) ? 0xff : 0xffff;
const int nr = (ASCON_RATE == 8) ? 6 : 8;
const int mask = (ASCON_AEAD_RATE == 8) ? 0xff : 0xffff;
const int nr = (ASCON_AEAD_RATE == 8) ? 6 : 8;
state_t r = *s, t;
/* full plaintext blocks */
while (mlen >= ASCON_RATE) {
while (mlen >= ASCON_AEAD_RATE) {
t.z = _mm512_maskz_loadu_epi8(mask, m);
t.z = _mm512_maskz_shuffle_epi8(mask, t.z, u64big);
r.z = _mm512_xor_epi64(r.z, t.z);
t.z = _mm512_maskz_shuffle_epi8(mask, r.z, u64big);
_mm512_mask_storeu_epi8(c, mask, t.z);
P(&r, nr);
m += ASCON_RATE;
c += ASCON_RATE;
mlen -= ASCON_RATE;
m += ASCON_AEAD_RATE;
c += ASCON_AEAD_RATE;
mlen -= ASCON_AEAD_RATE;
}
*s = r;
/* final plaintext block */
word_t* px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) {
if (ASCON_AEAD_RATE == 16 && mlen >= 8) {
s->x0 = XOR(s->x0, LOAD(m, 8));
STORE(c, s->x0, 8);
px = &s->x1;
......@@ -129,11 +133,11 @@ forceinline void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m,
forceinline void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
const __m512i u64big = AVX512_SHUFFLE_U64BIG;
const int mask = (ASCON_RATE == 8) ? 0xff : 0xffff;
const int nr = (ASCON_RATE == 8) ? 6 : 8;
const int mask = (ASCON_AEAD_RATE == 8) ? 0xff : 0xffff;
const int nr = (ASCON_AEAD_RATE == 8) ? 6 : 8;
state_t r = *s, t, u;
/* full ciphertext blocks */
while (clen >= ASCON_RATE) {
while (clen >= ASCON_AEAD_RATE) {
t.z = _mm512_maskz_loadu_epi8(mask, c);
t.z = _mm512_maskz_shuffle_epi8(mask, t.z, u64big);
r.z = _mm512_xor_epi64(r.z, t.z);
......@@ -141,14 +145,14 @@ forceinline void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c,
r.z = _mm512_mask_blend_epi8(mask, r.z, t.z);
_mm512_mask_storeu_epi8(m, mask, u.z);
P(&r, nr);
m += ASCON_RATE;
c += ASCON_RATE;
clen -= ASCON_RATE;
m += ASCON_AEAD_RATE;
c += ASCON_AEAD_RATE;
clen -= ASCON_AEAD_RATE;
}
*s = r;
/* final ciphertext block */
word_t* px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) {
if (ASCON_AEAD_RATE == 16 && clen >= 8) {
word_t cx = LOAD(c, 8);
s->x0 = XOR(s->x0, cx);
STORE(m, s->x0, 8);
......@@ -174,11 +178,11 @@ forceinline void ascon_final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2;
ascon_loadkey(&K0, &K1, &K2, k);
/* finalize */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 8) {
s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 16) {
s->x2 = XOR(s->x2, K1);
s->x3 = XOR(s->x3, K2);
}
......@@ -202,7 +206,7 @@ int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
(void)nsec;
*clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */
ascon_init(&s, npub, k);
ascon_aeadinit(&s, npub, k);
ascon_adata(&s, ad, adlen);
ascon_encrypt(&s, c, m, mlen);
ascon_final(&s, k);
......@@ -222,7 +226,7 @@ int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
if (clen < CRYPTO_ABYTES) return -1;
*mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */
ascon_init(&s, npub, k);
ascon_aeadinit(&s, npub, k);
ascon_adata(&s, ad, adlen);
ascon_decrypt(&s, m, c, clen);
ascon_final(&s, k);
......
#define CRYPTO_VERSION "1.2.4"
#define CRYPTO_VERSION "1.2.5"
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16
#define ASCON_AEAD_RATE 16
......@@ -13,7 +13,7 @@ typedef union {
};
} state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_aeadinit(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
......
......@@ -15,18 +15,29 @@
#define ASCON_128_RATE 8
#define ASCON_128A_RATE 16
#define ASCON_HASH_RATE 8
#define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PA_ROUNDS 12
#define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_PA_ROUNDS 12
#define ASCON_HASH_PB_ROUNDS 12
#define ASCON_HASHA_PA_ROUNDS 12
#define ASCON_HASHA_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32
#define ASCON_128_IV WORD_T(0x80400c0600000000)
#define ASCON_128A_IV WORD_T(0x80800c0800000000)
#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000)
#define ASCON_HASH_IV WORD_T(0x00400c0000000100)
#define ASCON_XOF_IV WORD_T(0x00400c0000000000)
#define ASCON_128_IV WORD_T(0x80400c0600000000ull)
#define ASCON_128A_IV WORD_T(0x80800c0800000000ull)
#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000ull)
#define ASCON_HASH_IV WORD_T(0x00400c0000000100ull)
#define ASCON_HASHA_IV WORD_T(0x00400c0400000100ull)
#define ASCON_XOF_IV WORD_T(0x00400c0000000000ull)
#define ASCON_XOFA_IV WORD_T(0x00400c0400000000ull)
#define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull)
#define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull)
......@@ -34,12 +45,24 @@
#define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull)
#define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull)
#define ASCON_HASHA_IV0 WORD_T(0x01470194fc6528a6ull)
#define ASCON_HASHA_IV1 WORD_T(0x738ec38ac0adffa7ull)
#define ASCON_HASHA_IV2 WORD_T(0x2ec8e3296c76384cull)
#define ASCON_HASHA_IV3 WORD_T(0xd6f6a54d7f52377dull)
#define ASCON_HASHA_IV4 WORD_T(0xa13c42a223be8d87ull)
#define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull)
#define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull)
#define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull)
#define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull)
#define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull)
#define ASCON_XOFA_IV0 WORD_T(0x44906568b77b9832ull)
#define ASCON_XOFA_IV1 WORD_T(0xcd8d6cae53455532ull)
#define ASCON_XOFA_IV2 WORD_T(0xf7b5212756422129ull)
#define ASCON_XOFA_IV3 WORD_T(0x246885e1de0d225bull)
#define ASCON_XOFA_IV4 WORD_T(0xa8cb5ce33449973full)
#define START(n) ((3 + (n)) << 4 | (12 - (n)))
#define RC(c) WORD_T(c)
......
#define CRYPTO_VERSION "1.2.5"
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16
#define ASCON_AEAD_RATE 16
......@@ -3,15 +3,14 @@
#include <stdint.h>
#include "config.h"
#include "word.h"
typedef struct {
word_t x0, x1, x2, x3, x4;
} state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_aeadinit(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k);
......
#ifndef CONFIG_H_
#define CONFIG_H_
/* inline the Ascon mode */
/* inline the ascon mode */
#ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1
#endif
/* inline the Ascon permutations */
/* inline all permutations */
#ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 0
#endif
/* single function for all permutations */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 0
#endif
/* unroll the permutation loops */
/* unroll permutation loops */
#ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 0
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#define ASCON_UNROLL_LOOPS 1
#endif
#endif /* CONFIG_H_ */
......@@ -4,7 +4,7 @@
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* macros for big endian machines */
#ifndef NDEBUG
#ifdef PRAGMA_ENDIAN
#pragma message("Using macros for big endian machines")
#endif
#define U64BIG(x) (x)
......@@ -15,7 +15,7 @@
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
/* macros for little endian machines */
#ifndef NDEBUG
#ifdef PRAGMA_ENDIAN
#pragma message("Using macros for little endian machines")
#endif
#define U64BIG(x) \
......
......@@ -3,7 +3,9 @@
#include <stdint.h>
__forceinline uint32_t deinterleave_uint32(uint32_t x) {
#include "forceinline.h"
forceinline uint32_t deinterleave_uint32(uint32_t x) {
uint32_t t;
t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1);
t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2);
......@@ -12,7 +14,7 @@ __forceinline uint32_t deinterleave_uint32(uint32_t x) {
return x;
}
__forceinline uint32_t interleave_uint32(uint32_t x) {
forceinline uint32_t interleave_uint32(uint32_t x) {
uint32_t t;
t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8);
t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4);
......@@ -22,7 +24,7 @@ __forceinline uint32_t interleave_uint32(uint32_t x) {
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t deinterleave32(uint64_t in) {
forceinline uint64_t deinterleave32(uint64_t in) {
uint32_t hi = in >> 32;
uint32_t lo = in;
uint32_t r0, r1;
......@@ -34,7 +36,7 @@ __forceinline uint64_t deinterleave32(uint64_t in) {
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t interleave32(uint64_t in) {
forceinline uint64_t interleave32(uint64_t in) {
uint32_t r0 = in;
uint32_t r1 = in >> 32;
uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16);
......
#include "permutations.h"
#include "round.h"
#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM
#if !ASCON_UNROLL_LOOPS
const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9},
{0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9},
......@@ -10,35 +8,16 @@ const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9},
#endif
#if ASCON_INLINE_PERM
#elif ASCON_SINGLE_PERM
void P(state_t* s, uint8_t rounds) {
printstate(" permutation input", s);
for (int i = START(rounds); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#if !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s) { P12ROUNDS(s); }
void P8(state_t* s) { P8ROUNDS(s); }
void P6(state_t* s) { P6ROUNDS(s); }
void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16
void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
#endif
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8
void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
#endif
#if !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
void P(state_t* s, int nr) { PROUNDS(s, nr); }
#endif
......@@ -5,6 +5,7 @@
#include "api.h"
#include "ascon.h"
#include "config.h"
#include "printstate.h"
#include "round.h"
......@@ -14,154 +15,124 @@
#define ASCON_128_RATE 8
#define ASCON_128A_RATE 16
#define ASCON_HASH_RATE 8
#define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PA_ROUNDS 12
#define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32
#define ASCON_HASH_PA_ROUNDS 12
#define ASCON_HASH_PB_ROUNDS 12
#define ASCON_128_IV WORD_T(0x8021000008220000)
#define ASCON_128A_IV WORD_T(0x8822000000200000)
#define ASCON_80PQ_IV WORD_T(0xc021000008220000)
#define ASCON_HASH_IV WORD_T(0x0020000008020010)
#define ASCON_XOF_IV WORD_T(0x0020000008020000)
#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7)
#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340)
#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8)
#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98)
#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398)
#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6)
#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220)
#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04)
#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2)
#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#define ASCON_HASHA_PA_ROUNDS 12
#define ASCON_HASHA_PB_ROUNDS 8
#if ASCON_RATE == 16
#define IV ASCON_128A_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define PB P8
#endif
#define ASCON_HASH_BYTES 32
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20
#define IV ASCON_80PQ_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#define ASCON_128_IV WORD_T(0x8021000008220000ull)
#define ASCON_128A_IV WORD_T(0x8822000000200000ull)
#define ASCON_80PQ_IV WORD_T(0xc021000008220000ull)
#define ASCON_HASH_IV WORD_T(0x0020000008020010ull)
#define ASCON_XOF_IV WORD_T(0x0020000008020000ull)
#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7ull)
#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340ull)
#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8ull)
#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98ull)
#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398ull)
#define ASCON_HASHA_IV0 WORD_T(0x0108e46d1b16eb02ull)
#define ASCON_HASHA_IV1 WORD_T(0x5b9b8efdd29083f3ull)
#define ASCON_HASHA_IV2 WORD_T(0x7ad665622891ae4aull)
#define ASCON_HASHA_IV3 WORD_T(0x9dc27156ee3bfc7full)
#define ASCON_HASHA_IV4 WORD_T(0xc61d5fa916801633ull)
#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6ull)
#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220ull)
#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04ull)
#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2ull)
#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656ull)
#define ASCON_XOFA_IV0 WORD_T(0x0846d7a5a4b87d44ull)
#define ASCON_XOFA_IV1 WORD_T(0xaa6f1005b3a2dbf4ull)
#define ASCON_XOFA_IV2 WORD_T(0xdc451146f713e811ull)
#define ASCON_XOFA_IV3 WORD_T(0x468cb2532839e30dull)
#define ASCON_XOFA_IV4 WORD_T(0xeb2d429709e96977ull)
#define START(n) (12 - n)
#if ASCON_UNROLL_LOOPS
__forceinline void P12ROUNDS(state_t* s) {
ROUND(s, 0xc, 0xc);
ROUND(s, 0x9, 0xc);
ROUND(s, 0xc, 0x9);
ROUND(s, 0x9, 0x9);
ROUND(s, 0x6, 0xc);
ROUND(s, 0x3, 0xc);
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
#define RC(e, o) WORD_T((uint64_t)o << 32 | e)
forceinline void P12ROUNDS(state_t* s) {
ROUND(s, RC(0xc, 0xc));
ROUND(s, RC(0x9, 0xc));
ROUND(s, RC(0xc, 0x9));
ROUND(s, RC(0x9, 0x9));
ROUND(s, RC(0x6, 0xc));
ROUND(s, RC(0x3, 0xc));
ROUND(s, RC(0x6, 0x9));
ROUND(s, RC(0x3, 0x9));
ROUND(s, RC(0xc, 0x6));
ROUND(s, RC(0x9, 0x6));
ROUND(s, RC(0xc, 0x3));
ROUND(s, RC(0x9, 0x3));
}
__forceinline void P8ROUNDS(state_t* s) {
ROUND(s, 0x6, 0xc);
ROUND(s, 0x3, 0xc);
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
forceinline void P8ROUNDS(state_t* s) {
ROUND(s, RC(0x6, 0xc));
ROUND(s, RC(0x3, 0xc));
ROUND(s, RC(0x6, 0x9));
ROUND(s, RC(0x3, 0x9));
ROUND(s, RC(0xc, 0x6));
ROUND(s, RC(0x9, 0x6));
ROUND(s, RC(0xc, 0x3));
ROUND(s, RC(0x9, 0x3));
}
__forceinline void P6ROUNDS(state_t* s) {
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
forceinline void P6ROUNDS(state_t* s) {
ROUND(s, RC(0x6, 0x9));
ROUND(s, RC(0x3, 0x9));
ROUND(s, RC(0xc, 0x6));
ROUND(s, RC(0x9, 0x6));
ROUND(s, RC(0xc, 0x3));
ROUND(s, RC(0x9, 0x3));
}
#else /* !ASCON_UNROLL_LOOPS */
extern const uint8_t constants[][2];
__forceinline void P12ROUNDS(state_t* s) {
for (int i = START(12); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
forceinline void PROUNDS(state_t* s, int nr) {
for (int i = START(nr); i < 12; i++)
ROUND(s, RC(constants[i][0], constants[i][1]));
}
__forceinline void P8ROUNDS(state_t* s) {
for (int i = START(8); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#if ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
__forceinline void P6ROUNDS(state_t* s) {
for (int i = START(6); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
forceinline void P(state_t* s, int nr) {
if (nr == 12) P12ROUNDS(s);
if (nr == 8) P8ROUNDS(s);
if (nr == 6) P6ROUNDS(s);
}
#endif
#if ASCON_INLINE_PERM
__forceinline void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
#elif !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
__forceinline void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
__forceinline void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
forceinline void P(state_t* s, int nr) {
if (nr == 12) P12(s);
if (nr == 8) P8(s);
if (nr == 6) P6(s);
}
#elif ASCON_SINGLE_PERM
#elif ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
#define P12(s) P(s, 12)
#define P8(s) P(s, 8)
#define P6(s) P(s, 6)
forceinline void P(state_t* s, int nr) { PROUNDS(s, nr); }
void P(state_t* s, uint8_t rounds);
#else /* !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS */
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
void P(state_t* s, int nr);
#endif
......
#ifndef PRINTSTATE_H_
#define PRINTSTATE_H_
#ifdef NDEBUG
#ifdef ASCON_PRINTSTATE
#define printword(text, w)
#define printstate(text, s)
#include "ascon.h"
#include "word.h"
#else
void printword(const char* text, const word_t x);
void printstate(const char* text, const state_t* s);
#include <inttypes.h>
#include <stdio.h>
#else
#include "ascon.h"
#include "word.h"
#define printword(text, w) \
do { \
} while (0)
__forceinline void printword(const char* text, const word_t x) {
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x));
}
__forceinline void printstate(const char* text, const state_t* s) {
printf("%s:\n", text);
printword(" x0", s->x0);
printword(" x1", s->x1);
printword(" x2", s->x2);
printword(" x3", s->x3);
printword(" x4", s->x4);
}
#define printstate(text, s) \
do { \
} while (0)
#endif
......
......@@ -4,13 +4,13 @@
#include "ascon.h"
#include "printstate.h"
__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
*K0 = WORD_T(0);
*K1 = WORD_T(0);
*K2 = WORD_T(0);
}
__forceinline void PINIT(state_t* s) {
forceinline void PINIT(state_t* s) {
s->x0 = WORD_T(0);
s->x1 = WORD_T(0);
s->x2 = WORD_T(0);
......@@ -18,67 +18,34 @@ __forceinline void PINIT(state_t* s) {
s->x4 = WORD_T(0);
}
__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) {
forceinline void ROUND(state_t* s, word_t C) {
state_t t;
/* round constant */
s->x2.e ^= C_e;
s->x2.o ^= C_o;
s->x2 = XOR(s->x2, C);
/* s-box layer */
s->x0.e ^= s->x4.e;
s->x0.o ^= s->x4.o;
s->x4.e ^= s->x3.e;
s->x4.o ^= s->x3.o;
s->x2.e ^= s->x1.e;
s->x2.o ^= s->x1.o;
t.x0.e = s->x0.e;
t.x0.o = s->x0.o;
t.x4.e = s->x4.e;
t.x4.o = s->x4.o;
t.x3.e = s->x3.e;
t.x3.o = s->x3.o;
t.x1.e = s->x1.e;
t.x1.o = s->x1.o;
t.x2.e = s->x2.e;
t.x2.o = s->x2.o;
s->x0.e = t.x0.e ^ (~t.x1.e & t.x2.e);
s->x0.o = t.x0.o ^ (~t.x1.o & t.x2.o);
s->x2.e = t.x2.e ^ (~t.x3.e & t.x4.e);
s->x2.o = t.x2.o ^ (~t.x3.o & t.x4.o);
s->x4.e = t.x4.e ^ (~t.x0.e & t.x1.e);
s->x4.o = t.x4.o ^ (~t.x0.o & t.x1.o);
s->x1.e = t.x1.e ^ (~t.x2.e & t.x3.e);
s->x1.o = t.x1.o ^ (~t.x2.o & t.x3.o);
s->x3.e = t.x3.e ^ (~t.x4.e & t.x0.e);
s->x3.o = t.x3.o ^ (~t.x4.o & t.x0.o);
s->x1.e ^= s->x0.e;
s->x1.o ^= s->x0.o;
s->x3.e ^= s->x2.e;
s->x3.o ^= s->x2.o;
s->x0.e ^= s->x4.e;
s->x0.o ^= s->x4.o;
s->x0 = XOR(s->x0, s->x4);
s->x4 = XOR(s->x4, s->x3);
s->x2 = XOR(s->x2, s->x1);
t.x0 = XOR(s->x0, AND(NOT(s->x1), s->x2));
t.x2 = XOR(s->x2, AND(NOT(s->x3), s->x4));
t.x4 = XOR(s->x4, AND(NOT(s->x0), s->x1));
t.x1 = XOR(s->x1, AND(NOT(s->x2), s->x3));
t.x3 = XOR(s->x3, AND(NOT(s->x4), s->x0));
t.x1 = XOR(t.x1, t.x0);
t.x3 = XOR(t.x3, t.x2);
t.x0 = XOR(t.x0, t.x4);
/* linear layer */
t.x0.e = s->x0.e ^ ROR32(s->x0.o, 4);
t.x0.o = s->x0.o ^ ROR32(s->x0.e, 5);
t.x1.e = s->x1.e ^ ROR32(s->x1.e, 11);
t.x1.o = s->x1.o ^ ROR32(s->x1.o, 11);
t.x2.e = s->x2.e ^ ROR32(s->x2.o, 2);
t.x2.o = s->x2.o ^ ROR32(s->x2.e, 3);
t.x3.e = s->x3.e ^ ROR32(s->x3.o, 3);
t.x3.o = s->x3.o ^ ROR32(s->x3.e, 4);
t.x4.e = s->x4.e ^ ROR32(s->x4.e, 17);
t.x4.o = s->x4.o ^ ROR32(s->x4.o, 17);
s->x0.e ^= ROR32(t.x0.o, 9);
s->x0.o ^= ROR32(t.x0.e, 10);
s->x1.e ^= ROR32(t.x1.o, 19);
s->x1.o ^= ROR32(t.x1.e, 20);
s->x2.e ^= t.x2.o;
s->x2.o ^= ROR32(t.x2.e, 1);
s->x3.e ^= ROR32(t.x3.e, 5);
s->x3.o ^= ROR32(t.x3.o, 5);
s->x4.e ^= ROR32(t.x4.o, 3);
s->x4.o ^= ROR32(t.x4.e, 4);
s->x2.e = ~s->x2.e;
s->x2.o = ~s->x2.o;
s->x2 = XOR(t.x2, ROR(t.x2, 6 - 1));
s->x3 = XOR(t.x3, ROR(t.x3, 17 - 10));
s->x4 = XOR(t.x4, ROR(t.x4, 41 - 7));
s->x0 = XOR(t.x0, ROR(t.x0, 28 - 19));
s->x1 = XOR(t.x1, ROR(t.x1, 61 - 39));
s->x2 = XOR(t.x2, ROR(s->x2, 1));
s->x3 = XOR(t.x3, ROR(s->x3, 10));
s->x4 = XOR(t.x4, ROR(s->x4, 7));
s->x0 = XOR(t.x0, ROR(s->x0, 19));
s->x1 = XOR(t.x1, ROR(s->x1, 39));
s->x2 = NOT(s->x2);
printstate(" round output", s);
}
......
......@@ -4,6 +4,7 @@
#include <stdint.h>
#include "endian.h"
#include "forceinline.h"
#include "interleave.h"
typedef struct {
......@@ -11,102 +12,92 @@ typedef struct {
uint32_t o;
} word_t;
__forceinline uint32_t ROR32(uint32_t x, int n) {
forceinline uint32_t ROR32(uint32_t x, int n) {
return (n == 0) ? x : x >> n | x << (32 - n);
}
__forceinline word_t ROR64(word_t x, int n) {
forceinline word_t ROR(word_t x, int n) {
word_t r;
r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2);
r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2);
return r;
}
__forceinline word_t WORD_T(uint64_t x) {
return (word_t){.o = x >> 32, .e = x};
}
forceinline word_t WORD_T(uint64_t x) { return (word_t){.o = x >> 32, .e = x}; }
__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; }
forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; }
__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); }
forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); }
__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); }
forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); }
__forceinline word_t NOT(word_t a) {
forceinline word_t NOT(word_t a) {
a.e = ~a.e;
a.o = ~a.o;
return a;
}
__forceinline word_t XOR(word_t a, word_t b) {
forceinline word_t XOR(word_t a, word_t b) {
a.e ^= b.e;
a.o ^= b.o;
return a;
}
__forceinline word_t AND(word_t a, word_t b) {
forceinline word_t AND(word_t a, word_t b) {
a.e &= b.e;
a.o &= b.o;
return a;
}
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
word_t r;
r.o = lo2hi.o << 16 | hi2lo.o >> 16;
r.e = lo2hi.e << 16 | hi2lo.e >> 16;
r.o = lo2hi.o << 16 | hi2lo.o >> 16;
return r;
}
__forceinline uint8_t NOTZERO(word_t a, word_t b) {
forceinline int NOTZERO(word_t a, word_t b) {
uint32_t result = a.e | a.o | b.e | b.o;
result |= result >> 16;
result |= result >> 8;
return (uint8_t)result;
return ((((int)(result & 0xff) - 1) >> 8) & 1) - 1;
}
__forceinline word_t PAD(int i) {
return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32);
forceinline word_t PAD(int i) {
return WORD_T((uint64_t)(0x8ul << (28 - 4 * i)) << 32);
}
__forceinline word_t CLEAR(word_t w, int n) {
forceinline word_t CLEAR(word_t w, int n) {
/* undefined for n == 0 */
uint32_t mask = 0x0fffffff >> (n * 4 - 4);
return AND(w, WORD_T((uint64_t)mask << 32 | mask));
w.e &= mask;
w.o &= mask;
return w;
}
__forceinline uint64_t MASK(int n) {
forceinline uint64_t MASK(int n) {
/* undefined for n == 0 */
return ~0ull >> (64 - 8 * n);
}
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = *(uint64_t*)bytes;
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes & MASK(n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(n);
*(uint64_t*)bytes |= U64BIG(x);
}
__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i];
return U64TOWORD(x);
}
__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i];
}
......
#define CRYPTO_VERSION "1.2.5"
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16
#define ASCON_AEAD_RATE 16
......@@ -3,15 +3,14 @@
#include <stdint.h>
#include "config.h"
#include "word.h"
typedef struct {
word_t x0, x1, x2, x3, x4;
} state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_aeadinit(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k);
......
#ifndef CONFIG_H_
#define CONFIG_H_
/* inline the Ascon mode */
/* inline the ascon mode */
#ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1
#define ASCON_INLINE_MODE 0
#endif
/* inline the Ascon permutations */
/* inline all permutations */
#ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 0
#endif
/* single function for all permutations */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 0
#endif
/* unroll the permutation loops */
/* unroll permutation loops */
#ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 1
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif
#endif /* CONFIG_H_ */
......@@ -4,7 +4,7 @@
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* macros for big endian machines */
#ifndef NDEBUG
#ifdef PRAGMA_ENDIAN
#pragma message("Using macros for big endian machines")
#endif
#define U64BIG(x) (x)
......@@ -15,7 +15,7 @@
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
/* macros for little endian machines */
#ifndef NDEBUG
#ifdef PRAGMA_ENDIAN
#pragma message("Using macros for little endian machines")
#endif
#define U64BIG(x) \
......
......@@ -3,7 +3,9 @@
#include <stdint.h>
__forceinline uint32_t deinterleave_uint32(uint32_t x) {
#include "forceinline.h"
forceinline uint32_t deinterleave_uint32(uint32_t x) {
uint32_t t;
t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1);
t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2);
......@@ -12,7 +14,7 @@ __forceinline uint32_t deinterleave_uint32(uint32_t x) {
return x;
}
__forceinline uint32_t interleave_uint32(uint32_t x) {
forceinline uint32_t interleave_uint32(uint32_t x) {
uint32_t t;
t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8);
t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4);
......@@ -22,7 +24,7 @@ __forceinline uint32_t interleave_uint32(uint32_t x) {
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t deinterleave32(uint64_t in) {
forceinline uint64_t deinterleave32(uint64_t in) {
uint32_t hi = in >> 32;
uint32_t lo = in;
uint32_t r0, r1;
......@@ -34,7 +36,7 @@ __forceinline uint64_t deinterleave32(uint64_t in) {
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t interleave32(uint64_t in) {
forceinline uint64_t interleave32(uint64_t in) {
uint32_t r0 = in;
uint32_t r1 = in >> 32;
uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16);
......
#include "permutations.h"
#include "round.h"
#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM
#if !ASCON_UNROLL_LOOPS
const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9},
{0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9},
......@@ -10,35 +8,16 @@ const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9},
#endif
#if ASCON_INLINE_PERM
#elif ASCON_SINGLE_PERM
void P(state_t* s, uint8_t rounds) {
printstate(" permutation input", s);
for (int i = START(rounds); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#if !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s) { P12ROUNDS(s); }
void P8(state_t* s) { P8ROUNDS(s); }
void P6(state_t* s) { P6ROUNDS(s); }
void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16
void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
#endif
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8
void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
#endif
#if !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
void P(state_t* s, int nr) { PROUNDS(s, nr); }
#endif
......@@ -5,6 +5,7 @@
#include "api.h"
#include "ascon.h"
#include "config.h"
#include "printstate.h"
#include "round.h"
......@@ -14,154 +15,124 @@
#define ASCON_128_RATE 8
#define ASCON_128A_RATE 16
#define ASCON_HASH_RATE 8
#define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PA_ROUNDS 12
#define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32
#define ASCON_HASH_PA_ROUNDS 12
#define ASCON_HASH_PB_ROUNDS 12
#define ASCON_128_IV WORD_T(0x8021000008220000)
#define ASCON_128A_IV WORD_T(0x8822000000200000)
#define ASCON_80PQ_IV WORD_T(0xc021000008220000)
#define ASCON_HASH_IV WORD_T(0x0020000008020010)
#define ASCON_XOF_IV WORD_T(0x0020000008020000)
#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7)
#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340)
#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8)
#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98)
#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398)
#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6)
#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220)
#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04)
#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2)
#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#define ASCON_HASHA_PA_ROUNDS 12
#define ASCON_HASHA_PB_ROUNDS 8
#if ASCON_RATE == 16
#define IV ASCON_128A_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define PB P8
#endif
#define ASCON_HASH_BYTES 32
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20
#define IV ASCON_80PQ_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#define ASCON_128_IV WORD_T(0x8021000008220000ull)
#define ASCON_128A_IV WORD_T(0x8822000000200000ull)
#define ASCON_80PQ_IV WORD_T(0xc021000008220000ull)
#define ASCON_HASH_IV WORD_T(0x0020000008020010ull)
#define ASCON_XOF_IV WORD_T(0x0020000008020000ull)
#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7ull)
#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340ull)
#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8ull)
#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98ull)
#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398ull)
#define ASCON_HASHA_IV0 WORD_T(0x0108e46d1b16eb02ull)
#define ASCON_HASHA_IV1 WORD_T(0x5b9b8efdd29083f3ull)
#define ASCON_HASHA_IV2 WORD_T(0x7ad665622891ae4aull)
#define ASCON_HASHA_IV3 WORD_T(0x9dc27156ee3bfc7full)
#define ASCON_HASHA_IV4 WORD_T(0xc61d5fa916801633ull)
#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6ull)
#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220ull)
#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04ull)
#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2ull)
#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656ull)
#define ASCON_XOFA_IV0 WORD_T(0x0846d7a5a4b87d44ull)
#define ASCON_XOFA_IV1 WORD_T(0xaa6f1005b3a2dbf4ull)
#define ASCON_XOFA_IV2 WORD_T(0xdc451146f713e811ull)
#define ASCON_XOFA_IV3 WORD_T(0x468cb2532839e30dull)
#define ASCON_XOFA_IV4 WORD_T(0xeb2d429709e96977ull)
#define START(n) (12 - n)
#if ASCON_UNROLL_LOOPS
__forceinline void P12ROUNDS(state_t* s) {
ROUND(s, 0xc, 0xc);
ROUND(s, 0x9, 0xc);
ROUND(s, 0xc, 0x9);
ROUND(s, 0x9, 0x9);
ROUND(s, 0x6, 0xc);
ROUND(s, 0x3, 0xc);
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
#define RC(e, o) WORD_T((uint64_t)o << 32 | e)
forceinline void P12ROUNDS(state_t* s) {
ROUND(s, RC(0xc, 0xc));
ROUND(s, RC(0x9, 0xc));
ROUND(s, RC(0xc, 0x9));
ROUND(s, RC(0x9, 0x9));
ROUND(s, RC(0x6, 0xc));
ROUND(s, RC(0x3, 0xc));
ROUND(s, RC(0x6, 0x9));
ROUND(s, RC(0x3, 0x9));
ROUND(s, RC(0xc, 0x6));
ROUND(s, RC(0x9, 0x6));
ROUND(s, RC(0xc, 0x3));
ROUND(s, RC(0x9, 0x3));
}
__forceinline void P8ROUNDS(state_t* s) {
ROUND(s, 0x6, 0xc);
ROUND(s, 0x3, 0xc);
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
forceinline void P8ROUNDS(state_t* s) {
ROUND(s, RC(0x6, 0xc));
ROUND(s, RC(0x3, 0xc));
ROUND(s, RC(0x6, 0x9));
ROUND(s, RC(0x3, 0x9));
ROUND(s, RC(0xc, 0x6));
ROUND(s, RC(0x9, 0x6));
ROUND(s, RC(0xc, 0x3));
ROUND(s, RC(0x9, 0x3));
}
__forceinline void P6ROUNDS(state_t* s) {
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
forceinline void P6ROUNDS(state_t* s) {
ROUND(s, RC(0x6, 0x9));
ROUND(s, RC(0x3, 0x9));
ROUND(s, RC(0xc, 0x6));
ROUND(s, RC(0x9, 0x6));
ROUND(s, RC(0xc, 0x3));
ROUND(s, RC(0x9, 0x3));
}
#else /* !ASCON_UNROLL_LOOPS */
extern const uint8_t constants[][2];
__forceinline void P12ROUNDS(state_t* s) {
for (int i = START(12); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
forceinline void PROUNDS(state_t* s, int nr) {
for (int i = START(nr); i < 12; i++)
ROUND(s, RC(constants[i][0], constants[i][1]));
}
__forceinline void P8ROUNDS(state_t* s) {
for (int i = START(8); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#if ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
__forceinline void P6ROUNDS(state_t* s) {
for (int i = START(6); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
forceinline void P(state_t* s, int nr) {
if (nr == 12) P12ROUNDS(s);
if (nr == 8) P8ROUNDS(s);
if (nr == 6) P6ROUNDS(s);
}
#endif
#if ASCON_INLINE_PERM
__forceinline void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
#elif !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
__forceinline void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
__forceinline void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
forceinline void P(state_t* s, int nr) {
if (nr == 12) P12(s);
if (nr == 8) P8(s);
if (nr == 6) P6(s);
}
#elif ASCON_SINGLE_PERM
#elif ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
#define P12(s) P(s, 12)
#define P8(s) P(s, 8)
#define P6(s) P(s, 6)
forceinline void P(state_t* s, int nr) { PROUNDS(s, nr); }
void P(state_t* s, uint8_t rounds);
#else /* !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS */
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
void P(state_t* s, int nr);
#endif
......
#ifndef PRINTSTATE_H_
#define PRINTSTATE_H_
#ifdef NDEBUG
#ifdef ASCON_PRINTSTATE
#define printword(text, w)
#define printstate(text, s)
#include "ascon.h"
#include "word.h"
#else
void printword(const char* text, const word_t x);
void printstate(const char* text, const state_t* s);
#include <inttypes.h>
#include <stdio.h>
#else
#include "ascon.h"
#include "word.h"
#define printword(text, w) \
do { \
} while (0)
__forceinline void printword(const char* text, const word_t x) {
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x));
}
__forceinline void printstate(const char* text, const state_t* s) {
printf("%s:\n", text);
printword(" x0", s->x0);
printword(" x1", s->x1);
printword(" x2", s->x2);
printword(" x3", s->x3);
printword(" x4", s->x4);
}
#define printstate(text, s) \
do { \
} while (0)
#endif
......
......@@ -4,13 +4,13 @@
#include "ascon.h"
#include "printstate.h"
__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
*K0 = WORD_T(0);
*K1 = WORD_T(0);
*K2 = WORD_T(0);
}
__forceinline void PINIT(state_t* s) {
forceinline void PINIT(state_t* s) {
s->x0 = WORD_T(0);
s->x1 = WORD_T(0);
s->x2 = WORD_T(0);
......@@ -18,7 +18,7 @@ __forceinline void PINIT(state_t* s) {
s->x4 = WORD_T(0);
}
__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) {
forceinline void ROUND(state_t* s, word_t C) {
uint32_t tmp0, tmp1, tmp2, tmp3;
/* clang-format off */
__asm__ __volatile__( \
......@@ -92,8 +92,8 @@ __forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) {
[ tmp1 ] "=r"(tmp1), \
[ tmp2 ] "=r"(tmp2), \
[ tmp3 ] "=r"(tmp3) \
: [ C_e ] "i"(C_e), \
[ C_o ] "i"(C_o) \
: [ C_e ] "ri"(C.e), \
[ C_o ] "ri"(C.o) \
: );
/* clang-format on */
printstate(" round output", s);
......
......@@ -4,6 +4,7 @@
#include <stdint.h>
#include "endian.h"
#include "forceinline.h"
#include "interleave.h"
typedef struct {
......@@ -11,102 +12,92 @@ typedef struct {
uint32_t o;
} word_t;
__forceinline uint32_t ROR32(uint32_t x, int n) {
forceinline uint32_t ROR32(uint32_t x, int n) {
return (n == 0) ? x : x >> n | x << (32 - n);
}
__forceinline word_t ROR64(word_t x, int n) {
forceinline word_t ROR(word_t x, int n) {
word_t r;
r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2);
r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2);
return r;
}
__forceinline word_t WORD_T(uint64_t x) {
return (word_t){.o = x >> 32, .e = x};
}
forceinline word_t WORD_T(uint64_t x) { return (word_t){.o = x >> 32, .e = x}; }
__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; }
forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; }
__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); }
forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); }
__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); }
forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); }
__forceinline word_t NOT(word_t a) {
forceinline word_t NOT(word_t a) {
a.e = ~a.e;
a.o = ~a.o;
return a;
}
__forceinline word_t XOR(word_t a, word_t b) {
forceinline word_t XOR(word_t a, word_t b) {
a.e ^= b.e;
a.o ^= b.o;
return a;
}
__forceinline word_t AND(word_t a, word_t b) {
forceinline word_t AND(word_t a, word_t b) {
a.e &= b.e;
a.o &= b.o;
return a;
}
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
word_t r;
r.o = lo2hi.o << 16 | hi2lo.o >> 16;
r.e = lo2hi.e << 16 | hi2lo.e >> 16;
r.o = lo2hi.o << 16 | hi2lo.o >> 16;
return r;
}
__forceinline uint8_t NOTZERO(word_t a, word_t b) {
forceinline int NOTZERO(word_t a, word_t b) {
uint32_t result = a.e | a.o | b.e | b.o;
result |= result >> 16;
result |= result >> 8;
return (uint8_t)result;
return ((((int)(result & 0xff) - 1) >> 8) & 1) - 1;
}
__forceinline word_t PAD(int i) {
return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32);
forceinline word_t PAD(int i) {
return WORD_T((uint64_t)(0x8ul << (28 - 4 * i)) << 32);
}
__forceinline word_t CLEAR(word_t w, int n) {
forceinline word_t CLEAR(word_t w, int n) {
/* undefined for n == 0 */
uint32_t mask = 0x0fffffff >> (n * 4 - 4);
return AND(w, WORD_T((uint64_t)mask << 32 | mask));
w.e &= mask;
w.o &= mask;
return w;
}
__forceinline uint64_t MASK(int n) {
forceinline uint64_t MASK(int n) {
/* undefined for n == 0 */
return ~0ull >> (64 - 8 * n);
}
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = *(uint64_t*)bytes;
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes & MASK(n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(n);
*(uint64_t*)bytes |= U64BIG(x);
}
__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i];
return U64TOWORD(x);
}
__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i];
}
......
#define CRYPTO_VERSION "1.2.5"
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16
#define ASCON_AEAD_RATE 16
......@@ -3,15 +3,14 @@
#include <stdint.h>
#include "config.h"
#include "word.h"
typedef struct {
word_t x0, x1, x2, x3, x4;
} state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_aeadinit(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k);
......
#ifndef CONFIG_H_
#define CONFIG_H_
/* inline the Ascon mode */
/* inline the ascon mode */
#ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1
#define ASCON_INLINE_MODE 0
#endif
/* inline the Ascon permutations */
/* inline all permutations */
#ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 0
#define ASCON_INLINE_PERM 1
#endif
/* single function for all permutations */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 0
#endif
/* unroll the permutation loops */
/* unroll permutation loops */
#ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 1
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#define ASCON_UNROLL_LOOPS 0
#endif
#endif /* CONFIG_H_ */
......@@ -4,7 +4,7 @@
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* macros for big endian machines */
#ifndef NDEBUG
#ifdef PRAGMA_ENDIAN
#pragma message("Using macros for big endian machines")
#endif
#define U64BIG(x) (x)
......@@ -15,7 +15,7 @@
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
/* macros for little endian machines */
#ifndef NDEBUG
#ifdef PRAGMA_ENDIAN
#pragma message("Using macros for little endian machines")
#endif
#define U64BIG(x) \
......
......@@ -3,7 +3,9 @@
#include <stdint.h>
__forceinline uint32_t deinterleave_uint32(uint32_t x) {
#include "forceinline.h"
forceinline uint32_t deinterleave_uint32(uint32_t x) {
uint32_t t;
t = (x ^ (x >> 1)) & 0x22222222, x ^= t ^ (t << 1);
t = (x ^ (x >> 2)) & 0x0C0C0C0C, x ^= t ^ (t << 2);
......@@ -12,7 +14,7 @@ __forceinline uint32_t deinterleave_uint32(uint32_t x) {
return x;
}
__forceinline uint32_t interleave_uint32(uint32_t x) {
forceinline uint32_t interleave_uint32(uint32_t x) {
uint32_t t;
t = (x ^ (x >> 8)) & 0x0000FF00, x ^= t ^ (t << 8);
t = (x ^ (x >> 4)) & 0x00F000F0, x ^= t ^ (t << 4);
......@@ -22,7 +24,7 @@ __forceinline uint32_t interleave_uint32(uint32_t x) {
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t deinterleave32(uint64_t in) {
forceinline uint64_t deinterleave32(uint64_t in) {
uint32_t hi = in >> 32;
uint32_t lo = in;
uint32_t r0, r1;
......@@ -34,7 +36,7 @@ __forceinline uint64_t deinterleave32(uint64_t in) {
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
__forceinline uint64_t interleave32(uint64_t in) {
forceinline uint64_t interleave32(uint64_t in) {
uint32_t r0 = in;
uint32_t r1 = in >> 32;
uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16);
......
#include "permutations.h"
#include "round.h"
#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM
#if !ASCON_UNROLL_LOOPS
const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9},
{0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9},
......@@ -10,35 +8,16 @@ const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9},
#endif
#if ASCON_INLINE_PERM
#elif ASCON_SINGLE_PERM
void P(state_t* s, uint8_t rounds) {
printstate(" permutation input", s);
for (int i = START(rounds); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#if !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s) { P12ROUNDS(s); }
void P8(state_t* s) { P8ROUNDS(s); }
void P6(state_t* s) { P6ROUNDS(s); }
void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16
void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
#endif
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8
void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
#endif
#if !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
void P(state_t* s, int nr) { PROUNDS(s, nr); }
#endif
......@@ -5,6 +5,7 @@
#include "api.h"
#include "ascon.h"
#include "config.h"
#include "printstate.h"
#include "round.h"
......@@ -14,154 +15,124 @@
#define ASCON_128_RATE 8
#define ASCON_128A_RATE 16
#define ASCON_HASH_RATE 8
#define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PA_ROUNDS 12
#define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32
#define ASCON_HASH_PA_ROUNDS 12
#define ASCON_HASH_PB_ROUNDS 12
#define ASCON_128_IV WORD_T(0x8021000008220000)
#define ASCON_128A_IV WORD_T(0x8822000000200000)
#define ASCON_80PQ_IV WORD_T(0xc021000008220000)
#define ASCON_HASH_IV WORD_T(0x0020000008020010)
#define ASCON_XOF_IV WORD_T(0x0020000008020000)
#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7)
#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340)
#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8)
#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98)
#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398)
#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6)
#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220)
#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04)
#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2)
#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#define ASCON_HASHA_PA_ROUNDS 12
#define ASCON_HASHA_PB_ROUNDS 8
#if ASCON_RATE == 16
#define IV ASCON_128A_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define PB P8
#endif
#define ASCON_HASH_BYTES 32
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20
#define IV ASCON_80PQ_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#define ASCON_128_IV WORD_T(0x8021000008220000ull)
#define ASCON_128A_IV WORD_T(0x8822000000200000ull)
#define ASCON_80PQ_IV WORD_T(0xc021000008220000ull)
#define ASCON_HASH_IV WORD_T(0x0020000008020010ull)
#define ASCON_XOF_IV WORD_T(0x0020000008020000ull)
#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7ull)
#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340ull)
#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8ull)
#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98ull)
#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398ull)
#define ASCON_HASHA_IV0 WORD_T(0x0108e46d1b16eb02ull)
#define ASCON_HASHA_IV1 WORD_T(0x5b9b8efdd29083f3ull)
#define ASCON_HASHA_IV2 WORD_T(0x7ad665622891ae4aull)
#define ASCON_HASHA_IV3 WORD_T(0x9dc27156ee3bfc7full)
#define ASCON_HASHA_IV4 WORD_T(0xc61d5fa916801633ull)
#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6ull)
#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220ull)
#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04ull)
#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2ull)
#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656ull)
#define ASCON_XOFA_IV0 WORD_T(0x0846d7a5a4b87d44ull)
#define ASCON_XOFA_IV1 WORD_T(0xaa6f1005b3a2dbf4ull)
#define ASCON_XOFA_IV2 WORD_T(0xdc451146f713e811ull)
#define ASCON_XOFA_IV3 WORD_T(0x468cb2532839e30dull)
#define ASCON_XOFA_IV4 WORD_T(0xeb2d429709e96977ull)
#define START(n) (12 - n)
#if ASCON_UNROLL_LOOPS
__forceinline void P12ROUNDS(state_t* s) {
ROUND(s, 0xc, 0xc);
ROUND(s, 0x9, 0xc);
ROUND(s, 0xc, 0x9);
ROUND(s, 0x9, 0x9);
ROUND(s, 0x6, 0xc);
ROUND(s, 0x3, 0xc);
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
#define RC(e, o) WORD_T((uint64_t)o << 32 | e)
forceinline void P12ROUNDS(state_t* s) {
ROUND(s, RC(0xc, 0xc));
ROUND(s, RC(0x9, 0xc));
ROUND(s, RC(0xc, 0x9));
ROUND(s, RC(0x9, 0x9));
ROUND(s, RC(0x6, 0xc));
ROUND(s, RC(0x3, 0xc));
ROUND(s, RC(0x6, 0x9));
ROUND(s, RC(0x3, 0x9));
ROUND(s, RC(0xc, 0x6));
ROUND(s, RC(0x9, 0x6));
ROUND(s, RC(0xc, 0x3));
ROUND(s, RC(0x9, 0x3));
}
__forceinline void P8ROUNDS(state_t* s) {
ROUND(s, 0x6, 0xc);
ROUND(s, 0x3, 0xc);
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
forceinline void P8ROUNDS(state_t* s) {
ROUND(s, RC(0x6, 0xc));
ROUND(s, RC(0x3, 0xc));
ROUND(s, RC(0x6, 0x9));
ROUND(s, RC(0x3, 0x9));
ROUND(s, RC(0xc, 0x6));
ROUND(s, RC(0x9, 0x6));
ROUND(s, RC(0xc, 0x3));
ROUND(s, RC(0x9, 0x3));
}
__forceinline void P6ROUNDS(state_t* s) {
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
forceinline void P6ROUNDS(state_t* s) {
ROUND(s, RC(0x6, 0x9));
ROUND(s, RC(0x3, 0x9));
ROUND(s, RC(0xc, 0x6));
ROUND(s, RC(0x9, 0x6));
ROUND(s, RC(0xc, 0x3));
ROUND(s, RC(0x9, 0x3));
}
#else /* !ASCON_UNROLL_LOOPS */
extern const uint8_t constants[][2];
__forceinline void P12ROUNDS(state_t* s) {
for (int i = START(12); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
forceinline void PROUNDS(state_t* s, int nr) {
for (int i = START(nr); i < 12; i++)
ROUND(s, RC(constants[i][0], constants[i][1]));
}
__forceinline void P8ROUNDS(state_t* s) {
for (int i = START(8); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#if ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
__forceinline void P6ROUNDS(state_t* s) {
for (int i = START(6); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
forceinline void P(state_t* s, int nr) {
if (nr == 12) P12ROUNDS(s);
if (nr == 8) P8ROUNDS(s);
if (nr == 6) P6ROUNDS(s);
}
#endif
#if ASCON_INLINE_PERM
__forceinline void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
#elif !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
__forceinline void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
__forceinline void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
forceinline void P(state_t* s, int nr) {
if (nr == 12) P12(s);
if (nr == 8) P8(s);
if (nr == 6) P6(s);
}
#elif ASCON_SINGLE_PERM
#elif ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
#define P12(s) P(s, 12)
#define P8(s) P(s, 8)
#define P6(s) P(s, 6)
forceinline void P(state_t* s, int nr) { PROUNDS(s, nr); }
void P(state_t* s, uint8_t rounds);
#else /* !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS */
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
void P(state_t* s, int nr);
#endif
......
#ifndef PRINTSTATE_H_
#define PRINTSTATE_H_
#ifdef NDEBUG
#ifdef ASCON_PRINTSTATE
#define printword(text, w)
#define printstate(text, s)
#include "ascon.h"
#include "word.h"
#else
void printword(const char* text, const word_t x);
void printstate(const char* text, const state_t* s);
#include <inttypes.h>
#include <stdio.h>
#else
#include "ascon.h"
#include "word.h"
#define printword(text, w) \
do { \
} while (0)
__forceinline void printword(const char* text, const word_t x) {
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x));
}
__forceinline void printstate(const char* text, const state_t* s) {
printf("%s:\n", text);
printword(" x0", s->x0);
printword(" x1", s->x1);
printword(" x2", s->x2);
printword(" x3", s->x3);
printword(" x4", s->x4);
}
#define printstate(text, s) \
do { \
} while (0)
#endif
......
......@@ -4,13 +4,13 @@
#include "ascon.h"
#include "printstate.h"
__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
*K0 = WORD_T(0);
*K1 = WORD_T(0);
*K2 = WORD_T(0);
}
__forceinline void PINIT(state_t* s) {
forceinline void PINIT(state_t* s) {
s->x0 = WORD_T(0);
s->x1 = WORD_T(0);
s->x2 = WORD_T(0);
......@@ -18,34 +18,34 @@ __forceinline void PINIT(state_t* s) {
s->x4 = WORD_T(0);
}
__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) {
word_t tmp, C = {.o = C_o, .e = C_e};
forceinline void ROUND(state_t* s, word_t C) {
word_t xtemp;
/* round constant */
s->x2 = XOR(s->x2, C);
/* s-box layer */
s->x0 = XOR(s->x0, s->x4);
s->x4 = XOR(s->x4, s->x3);
s->x2 = XOR(s->x2, s->x1);
tmp = AND(s->x0, NOT(s->x4));
xtemp = AND(s->x0, NOT(s->x4));
s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1)));
s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3)));
s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0)));
s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2)));
s->x3 = XOR(s->x3, tmp);
s->x3 = XOR(s->x3, xtemp);
s->x1 = XOR(s->x1, s->x0);
s->x3 = XOR(s->x3, s->x2);
s->x0 = XOR(s->x0, s->x4);
/* linear layer */
tmp = XOR(s->x0, ROR64(s->x0, 28 - 19));
s->x0 = XOR(s->x0, ROR64(tmp, 19));
tmp = XOR(s->x1, ROR64(s->x1, 61 - 39));
s->x1 = XOR(s->x1, ROR64(tmp, 39));
tmp = XOR(s->x2, ROR64(s->x2, 6 - 1));
s->x2 = XOR(s->x2, ROR64(tmp, 1));
tmp = XOR(s->x3, ROR64(s->x3, 17 - 10));
s->x3 = XOR(s->x3, ROR64(tmp, 10));
tmp = XOR(s->x4, ROR64(s->x4, 41 - 7));
s->x4 = XOR(s->x4, ROR64(tmp, 7));
xtemp = XOR(s->x0, ROR(s->x0, 28 - 19));
s->x0 = XOR(s->x0, ROR(xtemp, 19));
xtemp = XOR(s->x1, ROR(s->x1, 61 - 39));
s->x1 = XOR(s->x1, ROR(xtemp, 39));
xtemp = XOR(s->x2, ROR(s->x2, 6 - 1));
s->x2 = XOR(s->x2, ROR(xtemp, 1));
xtemp = XOR(s->x3, ROR(s->x3, 17 - 10));
s->x3 = XOR(s->x3, ROR(xtemp, 10));
xtemp = XOR(s->x4, ROR(s->x4, 41 - 7));
s->x4 = XOR(s->x4, ROR(xtemp, 7));
s->x2 = NOT(s->x2);
printstate(" round output", s);
}
......
......@@ -4,6 +4,7 @@
#include <stdint.h>
#include "endian.h"
#include "forceinline.h"
#include "interleave.h"
typedef struct {
......@@ -11,102 +12,92 @@ typedef struct {
uint32_t o;
} word_t;
__forceinline uint32_t ROR32(uint32_t x, int n) {
forceinline uint32_t ROR32(uint32_t x, int n) {
return (n == 0) ? x : x >> n | x << (32 - n);
}
__forceinline word_t ROR64(word_t x, int n) {
forceinline word_t ROR(word_t x, int n) {
word_t r;
r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2);
r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2);
return r;
}
__forceinline word_t WORD_T(uint64_t x) {
return (word_t){.o = x >> 32, .e = x};
}
forceinline word_t WORD_T(uint64_t x) { return (word_t){.o = x >> 32, .e = x}; }
__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; }
forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; }
__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); }
forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); }
__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); }
forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); }
__forceinline word_t NOT(word_t a) {
forceinline word_t NOT(word_t a) {
a.e = ~a.e;
a.o = ~a.o;
return a;
}
__forceinline word_t XOR(word_t a, word_t b) {
forceinline word_t XOR(word_t a, word_t b) {
a.e ^= b.e;
a.o ^= b.o;
return a;
}
__forceinline word_t AND(word_t a, word_t b) {
forceinline word_t AND(word_t a, word_t b) {
a.e &= b.e;
a.o &= b.o;
return a;
}
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
word_t r;
r.o = lo2hi.o << 16 | hi2lo.o >> 16;
r.e = lo2hi.e << 16 | hi2lo.e >> 16;
r.o = lo2hi.o << 16 | hi2lo.o >> 16;
return r;
}
__forceinline uint8_t NOTZERO(word_t a, word_t b) {
forceinline int NOTZERO(word_t a, word_t b) {
uint32_t result = a.e | a.o | b.e | b.o;
result |= result >> 16;
result |= result >> 8;
return (uint8_t)result;
return ((((int)(result & 0xff) - 1) >> 8) & 1) - 1;
}
__forceinline word_t PAD(int i) {
return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32);
forceinline word_t PAD(int i) {
return WORD_T((uint64_t)(0x8ul << (28 - 4 * i)) << 32);
}
__forceinline word_t CLEAR(word_t w, int n) {
forceinline word_t CLEAR(word_t w, int n) {
/* undefined for n == 0 */
uint32_t mask = 0x0fffffff >> (n * 4 - 4);
return AND(w, WORD_T((uint64_t)mask << 32 | mask));
w.e &= mask;
w.o &= mask;
return w;
}
__forceinline uint64_t MASK(int n) {
forceinline uint64_t MASK(int n) {
/* undefined for n == 0 */
return ~0ull >> (64 - 8 * n);
}
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = *(uint64_t*)bytes;
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes & MASK(n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(n);
*(uint64_t*)bytes |= U64BIG(x);
}
__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i];
return U64TOWORD(x);
}
__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i];
}
......
......@@ -3,49 +3,43 @@
#include "permutations.h"
#include "printstate.h"
void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len,
uint8_t mode);
void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen,
const uint8_t* ad, uint64_t adlen, const uint8_t* npub,
const uint8_t* k, uint8_t mode) {
word_t K0, K1, K2;
/* load key */
forceinline void ascon_loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 16) {
*K1 = XOR(*K1, LOAD(k, 8));
*K2 = XOR(*K2, LOAD(k + 8, 8));
}
if (CRYPTO_KEYBYTES == 20) {
K0 = KEYROT(WORD_T(0), LOAD(k, 4));
k += 4;
*K0 = XOR(*K0, KEYROT(WORD_T(0), LOADBYTES(k, 4)));
*K1 = XOR(*K1, LOADBYTES(k + 4, 8));
*K2 = XOR(*K2, LOADBYTES(k + 12, 8));
}
K1 = LOAD64(k);
K2 = LOAD64(k + 8);
/* initialization */
s->x0 = IV;
}
forceinline void ascon_aeadinit(state_t* s, const uint8_t* npub, word_t K0,
word_t K1, word_t K2) {
if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 8) s->x0 = ASCON_128_IV;
if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 16) s->x0 = ASCON_128A_IV;
if (CRYPTO_KEYBYTES == 20) s->x0 = ASCON_80PQ_IV;
if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0);
s->x1 = K1;
s->x2 = K2;
s->x3 = LOAD64(npub);
s->x4 = LOAD64(npub + 8);
P12(s);
s->x3 = LOAD(npub, 8);
s->x4 = LOAD(npub + 8, 8);
P(s, 12);
if (CRYPTO_KEYBYTES == 20) s->x2 = XOR(s->x2, K0);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("initialization", s);
/* process associated data */
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_ABSORB);
PB(s);
}
s->x4 = XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
/* process plaintext/ciphertext */
process_data(s, out, in, tlen, mode);
if (mode == ASCON_ENCRYPT) printstate("process plaintext", s);
if (mode == ASCON_DECRYPT) printstate("process ciphertext", s);
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
}
forceinline void ascon_final(state_t* s, word_t K0, word_t K1, word_t K2) {
if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 8) {
s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 16) {
s->x2 = XOR(s->x2, K1);
s->x3 = XOR(s->x3, K2);
}
......@@ -54,8 +48,32 @@ void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen,
s->x2 = XOR(s->x2, KEYROT(K1, K2));
s->x3 = XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
P(s, 12);
s->x3 = XOR(s->x3, K1);
s->x4 = XOR(s->x4, K2);
printstate("finalization", s);
}
void ascon_aead(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen,
const uint8_t* ad, uint64_t adlen, const uint8_t* npub,
const uint8_t* k, uint8_t mode) {
const int nr = (ASCON_AEAD_RATE == 8) ? 6 : 8;
word_t K0, K1, K2;
ascon_loadkey(&K0, &K1, &K2, k);
/* initialize */
ascon_aeadinit(s, npub, K0, K1, K2);
/* process associated data */
if (adlen) {
ascon_update(s, (void*)0, ad, adlen, ASCON_ABSORB);
P(s, nr);
}
/* domain separation */
s->x4 = XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
/* process plaintext/ciphertext */
ascon_update(s, out, in, tlen, mode);
if (mode == ASCON_ENCRYPT) printstate("process plaintext", s);
if (mode == ASCON_DECRYPT) printstate("process ciphertext", s);
/* finalize */
ascon_final(s, K0, K1, K2);
}
#define CRYPTO_VERSION "1.2.5"
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16
#define ASCON_AEAD_RATE 16
......@@ -3,7 +3,6 @@
#include <stdint.h>
#include "config.h"
#include "word.h"
typedef struct {
......@@ -13,13 +12,14 @@ typedef struct {
#define ASCON_ABSORB 0x1
#define ASCON_SQUEEZE 0x2
#define ASCON_INSERT 0x4
#define ASCON_HASH 0x8
#define ASCON_ENCRYPT (ASCON_ABSORB | ASCON_SQUEEZE)
#define ASCON_DECRYPT (ASCON_ABSORB | ASCON_SQUEEZE | ASCON_INSERT)
void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len,
void ascon_update(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len,
uint8_t mode);
void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen,
void ascon_aead(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen,
const uint8_t* ad, uint64_t adlen, const uint8_t* npub,
const uint8_t* k, uint8_t mode);
......
#ifndef CONFIG_H_
#define CONFIG_H_
/* inline the Ascon mode */
/* inline the ascon mode */
#ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1
#define ASCON_INLINE_MODE 0
#endif
/* inline the Ascon permutations */
/* inline all permutations */
#ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 0
#endif
/* single function for all permutations */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 1
#endif
/* unroll the permutation loops */
/* unroll permutation loops */
#ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 1
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#define ASCON_UNROLL_LOOPS 0
#endif
#endif /* CONFIG_H_ */
#include "api.h"
#include "ascon.h"
#include "crypto_aead.h"
#include "permutations.h"
#include "printstate.h"
void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen,
void ascon_aead(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen,
const uint8_t* ad, uint64_t adlen, const uint8_t* npub,
const uint8_t* k, uint8_t mode);
int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec,
const uint8_t* c, uint64_t clen, const uint8_t* ad,
uint64_t adlen, const uint8_t* npub, const uint8_t* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
state_t s;
(void)nsec;
if (clen < CRYPTO_ABYTES) return -1;
/* set plaintext size */
*mlen = clen - CRYPTO_ABYTES;
/* ascon decryption */
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DECRYPT);
ascon_aead(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DECRYPT);
/* verify tag (should be constant time, check compiler output) */
s.x3 = XOR(s.x3, LOADBYTES(c + *mlen, 8));
s.x4 = XOR(s.x4, LOADBYTES(c + *mlen + 8, 8));
if (NOTZERO(s.x3, s.x4)) {
*mlen = 0;
return -1;
}
return 0;
return NOTZERO(s.x3, s.x4);
}
#include "api.h"
#include "ascon.h"
#include "crypto_aead.h"
#include "permutations.h"
#include "printstate.h"
void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen,
void ascon_aead(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen,
const uint8_t* ad, uint64_t adlen, const uint8_t* npub,
const uint8_t* k, uint8_t mode);
int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m,
uint64_t mlen, const uint8_t* ad, uint64_t adlen,
const uint8_t* nsec, const uint8_t* npub,
const uint8_t* k) {
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
state_t s;
(void)nsec;
/* set ciphertext size */
*clen = mlen + CRYPTO_ABYTES;
/* ascon encryption */
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENCRYPT);
ascon_aead(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENCRYPT);
/* set tag */
STOREBYTES(c + mlen, s.x3, 8);
STOREBYTES(c + mlen + 8, s.x4, 8);
......
......@@ -4,7 +4,7 @@
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* macros for big endian machines */
#ifndef NDEBUG
#ifdef PRAGMA_ENDIAN
#pragma message("Using macros for big endian machines")
#endif
#define U64BIG(x) (x)
......@@ -15,7 +15,7 @@
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
/* macros for little endian machines */
#ifndef NDEBUG
#ifdef PRAGMA_ENDIAN
#pragma message("Using macros for little endian machines")
#endif
#define U64BIG(x) \
......
......@@ -3,6 +3,8 @@
#include <stdint.h>
#include "forceinline.h"
uint64_t deinterleave32(uint64_t in);
uint64_t interleave32(uint64_t in);
......
#include "permutations.h"
#include "round.h"
#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM
#if !ASCON_UNROLL_LOOPS
const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9},
{0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9},
......@@ -10,35 +8,16 @@ const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9},
#endif
#if ASCON_INLINE_PERM
#elif ASCON_SINGLE_PERM
void P(state_t* s, uint8_t rounds) {
printstate(" permutation input", s);
for (int i = START(rounds); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#if !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s) { P12ROUNDS(s); }
void P8(state_t* s) { P8ROUNDS(s); }
void P6(state_t* s) { P6ROUNDS(s); }
void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16
void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
#endif
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8
void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
#endif
#if !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
void P(state_t* s, int nr) { PROUNDS(s, nr); }
#endif
......@@ -5,6 +5,7 @@
#include "api.h"
#include "ascon.h"
#include "config.h"
#include "printstate.h"
#include "round.h"
......@@ -14,154 +15,124 @@
#define ASCON_128_RATE 8
#define ASCON_128A_RATE 16
#define ASCON_HASH_RATE 8
#define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PA_ROUNDS 12
#define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32
#define ASCON_HASH_PA_ROUNDS 12
#define ASCON_HASH_PB_ROUNDS 12
#define ASCON_128_IV WORD_T(0x8021000008220000)
#define ASCON_128A_IV WORD_T(0x8822000000200000)
#define ASCON_80PQ_IV WORD_T(0xc021000008220000)
#define ASCON_HASH_IV WORD_T(0x0020000008020010)
#define ASCON_XOF_IV WORD_T(0x0020000008020000)
#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7)
#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340)
#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8)
#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98)
#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398)
#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6)
#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220)
#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04)
#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2)
#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#define ASCON_HASHA_PA_ROUNDS 12
#define ASCON_HASHA_PB_ROUNDS 8
#if ASCON_RATE == 16
#define IV ASCON_128A_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define PB P8
#endif
#define ASCON_HASH_BYTES 32
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20
#define IV ASCON_80PQ_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#define ASCON_128_IV WORD_T(0x8021000008220000ull)
#define ASCON_128A_IV WORD_T(0x8822000000200000ull)
#define ASCON_80PQ_IV WORD_T(0xc021000008220000ull)
#define ASCON_HASH_IV WORD_T(0x0020000008020010ull)
#define ASCON_XOF_IV WORD_T(0x0020000008020000ull)
#define ASCON_HASH_IV0 WORD_T(0xf9afb5c6a540dbc7ull)
#define ASCON_HASH_IV1 WORD_T(0xbd2493011445a340ull)
#define ASCON_HASH_IV2 WORD_T(0xcb9ba8b5604d4fc8ull)
#define ASCON_HASH_IV3 WORD_T(0x12a4eede94514c98ull)
#define ASCON_HASH_IV4 WORD_T(0x4bca84c06339f398ull)
#define ASCON_HASHA_IV0 WORD_T(0x0108e46d1b16eb02ull)
#define ASCON_HASHA_IV1 WORD_T(0x5b9b8efdd29083f3ull)
#define ASCON_HASHA_IV2 WORD_T(0x7ad665622891ae4aull)
#define ASCON_HASHA_IV3 WORD_T(0x9dc27156ee3bfc7full)
#define ASCON_HASHA_IV4 WORD_T(0xc61d5fa916801633ull)
#define ASCON_XOF_IV0 WORD_T(0xc75782817e351ae6ull)
#define ASCON_XOF_IV1 WORD_T(0x70045f441d238220ull)
#define ASCON_XOF_IV2 WORD_T(0x5dd5ab52a13e3f04ull)
#define ASCON_XOF_IV3 WORD_T(0x3e378142c30c1db2ull)
#define ASCON_XOF_IV4 WORD_T(0x3735189db624d656ull)
#define ASCON_XOFA_IV0 WORD_T(0x0846d7a5a4b87d44ull)
#define ASCON_XOFA_IV1 WORD_T(0xaa6f1005b3a2dbf4ull)
#define ASCON_XOFA_IV2 WORD_T(0xdc451146f713e811ull)
#define ASCON_XOFA_IV3 WORD_T(0x468cb2532839e30dull)
#define ASCON_XOFA_IV4 WORD_T(0xeb2d429709e96977ull)
#define START(n) (12 - n)
#if ASCON_UNROLL_LOOPS
__forceinline void P12ROUNDS(state_t* s) {
ROUND(s, 0xc, 0xc);
ROUND(s, 0x9, 0xc);
ROUND(s, 0xc, 0x9);
ROUND(s, 0x9, 0x9);
ROUND(s, 0x6, 0xc);
ROUND(s, 0x3, 0xc);
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
#define RC(e, o) WORD_T((uint64_t)o << 32 | e)
forceinline void P12ROUNDS(state_t* s) {
ROUND(s, RC(0xc, 0xc));
ROUND(s, RC(0x9, 0xc));
ROUND(s, RC(0xc, 0x9));
ROUND(s, RC(0x9, 0x9));
ROUND(s, RC(0x6, 0xc));
ROUND(s, RC(0x3, 0xc));
ROUND(s, RC(0x6, 0x9));
ROUND(s, RC(0x3, 0x9));
ROUND(s, RC(0xc, 0x6));
ROUND(s, RC(0x9, 0x6));
ROUND(s, RC(0xc, 0x3));
ROUND(s, RC(0x9, 0x3));
}
__forceinline void P8ROUNDS(state_t* s) {
ROUND(s, 0x6, 0xc);
ROUND(s, 0x3, 0xc);
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
forceinline void P8ROUNDS(state_t* s) {
ROUND(s, RC(0x6, 0xc));
ROUND(s, RC(0x3, 0xc));
ROUND(s, RC(0x6, 0x9));
ROUND(s, RC(0x3, 0x9));
ROUND(s, RC(0xc, 0x6));
ROUND(s, RC(0x9, 0x6));
ROUND(s, RC(0xc, 0x3));
ROUND(s, RC(0x9, 0x3));
}
__forceinline void P6ROUNDS(state_t* s) {
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
forceinline void P6ROUNDS(state_t* s) {
ROUND(s, RC(0x6, 0x9));
ROUND(s, RC(0x3, 0x9));
ROUND(s, RC(0xc, 0x6));
ROUND(s, RC(0x9, 0x6));
ROUND(s, RC(0xc, 0x3));
ROUND(s, RC(0x9, 0x3));
}
#else /* !ASCON_UNROLL_LOOPS */
extern const uint8_t constants[][2];
__forceinline void P12ROUNDS(state_t* s) {
for (int i = START(12); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
forceinline void PROUNDS(state_t* s, int nr) {
for (int i = START(nr); i < 12; i++)
ROUND(s, RC(constants[i][0], constants[i][1]));
}
__forceinline void P8ROUNDS(state_t* s) {
for (int i = START(8); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#if ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
__forceinline void P6ROUNDS(state_t* s) {
for (int i = START(6); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
forceinline void P(state_t* s, int nr) {
if (nr == 12) P12ROUNDS(s);
if (nr == 8) P8ROUNDS(s);
if (nr == 6) P6ROUNDS(s);
}
#endif
#if ASCON_INLINE_PERM
__forceinline void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
#elif !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
__forceinline void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
__forceinline void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
forceinline void P(state_t* s, int nr) {
if (nr == 12) P12(s);
if (nr == 8) P8(s);
if (nr == 6) P6(s);
}
#elif ASCON_SINGLE_PERM
#elif ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
#define P12(s) P(s, 12)
#define P8(s) P(s, 8)
#define P6(s) P(s, 6)
forceinline void P(state_t* s, int nr) { PROUNDS(s, nr); }
void P(state_t* s, uint8_t rounds);
#else /* !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS */
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
void P(state_t* s, int nr);
#endif
......
#ifndef PRINTSTATE_H_
#define PRINTSTATE_H_
#ifdef NDEBUG
#ifdef ASCON_PRINTSTATE
#define printword(text, w)
#define printstate(text, s)
#include "ascon.h"
#include "word.h"
#else
void printword(const char* text, const word_t x);
void printstate(const char* text, const state_t* s);
#include <inttypes.h>
#include <stdio.h>
#else
#include "ascon.h"
#include "word.h"
#define printword(text, w) \
do { \
} while (0)
__forceinline void printword(const char* text, const word_t x) {
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x));
}
__forceinline void printstate(const char* text, const state_t* s) {
printf("%s:\n", text);
printword(" x0", s->x0);
printword(" x1", s->x1);
printword(" x2", s->x2);
printword(" x3", s->x3);
printword(" x4", s->x4);
}
#define printstate(text, s) \
do { \
} while (0)
#endif
......
......@@ -4,13 +4,13 @@
#include "ascon.h"
#include "printstate.h"
__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
*K0 = WORD_T(0);
*K1 = WORD_T(0);
*K2 = WORD_T(0);
}
__forceinline void PINIT(state_t* s) {
forceinline void PINIT(state_t* s) {
s->x0 = WORD_T(0);
s->x1 = WORD_T(0);
s->x2 = WORD_T(0);
......@@ -18,34 +18,34 @@ __forceinline void PINIT(state_t* s) {
s->x4 = WORD_T(0);
}
__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) {
word_t tmp, C = {.o = C_o, .e = C_e};
forceinline void ROUND(state_t* s, word_t C) {
word_t xtemp;
/* round constant */
s->x2 = XOR(s->x2, C);
/* s-box layer */
s->x0 = XOR(s->x0, s->x4);
s->x4 = XOR(s->x4, s->x3);
s->x2 = XOR(s->x2, s->x1);
tmp = AND(s->x0, NOT(s->x4));
xtemp = AND(s->x0, NOT(s->x4));
s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1)));
s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3)));
s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0)));
s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2)));
s->x3 = XOR(s->x3, tmp);
s->x3 = XOR(s->x3, xtemp);
s->x1 = XOR(s->x1, s->x0);
s->x3 = XOR(s->x3, s->x2);
s->x0 = XOR(s->x0, s->x4);
/* linear layer */
tmp = XOR(s->x0, ROR64(s->x0, 28 - 19));
s->x0 = XOR(s->x0, ROR64(tmp, 19));
tmp = XOR(s->x1, ROR64(s->x1, 61 - 39));
s->x1 = XOR(s->x1, ROR64(tmp, 39));
tmp = XOR(s->x2, ROR64(s->x2, 6 - 1));
s->x2 = XOR(s->x2, ROR64(tmp, 1));
tmp = XOR(s->x3, ROR64(s->x3, 17 - 10));
s->x3 = XOR(s->x3, ROR64(tmp, 10));
tmp = XOR(s->x4, ROR64(s->x4, 41 - 7));
s->x4 = XOR(s->x4, ROR64(tmp, 7));
xtemp = XOR(s->x0, ROR(s->x0, 28 - 19));
s->x0 = XOR(s->x0, ROR(xtemp, 19));
xtemp = XOR(s->x1, ROR(s->x1, 61 - 39));
s->x1 = XOR(s->x1, ROR(xtemp, 39));
xtemp = XOR(s->x2, ROR(s->x2, 6 - 1));
s->x2 = XOR(s->x2, ROR(xtemp, 1));
xtemp = XOR(s->x3, ROR(s->x3, 17 - 10));
s->x3 = XOR(s->x3, ROR(xtemp, 10));
xtemp = XOR(s->x4, ROR(s->x4, 41 - 7));
s->x4 = XOR(s->x4, ROR(xtemp, 7));
s->x2 = NOT(s->x2);
printstate(" round output", s);
}
......
......@@ -5,57 +5,40 @@
void ascon_update(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len,
uint8_t mode) {
const int nr = (ASCON_RATE == 8) ? 6 : 8;
const int rate = 16;
const int nr = 8;
word_t tmp0, tmp1;
/* full blocks */
while (len >= ASCON_RATE) {
tmp0 = LOAD(in, 8);
tmp1 = LOAD(in + 8, 8);
int n = 0, n0 = 0, n1 = 0;
while (len) {
/* determine block size */
n0 = len < 8 ? len : 8;
n1 = len < 8 ? 0 : (len < 16 ? len - 8 : 8);
n = n0 + n1;
/* absorb data */
tmp0 = LOAD(in, n0);
s->x0 = XOR(s->x0, tmp0);
s->x1 = XOR(s->x1, tmp1);
if (n1) tmp1 = LOAD(in + 8, n1);
if (n1) s->x1 = XOR(s->x1, tmp1);
/* extract data */
if (mode & ASCON_SQUEEZE) {
STORE(out, s->x0, 8);
STORE(out + 8, s->x1, 8);
STORE(out, s->x0, n0);
if (n1) STORE(out + 8, s->x1, n1);
}
/* insert data */
if (mode & ASCON_INSERT) {
s->x0 = tmp0;
s->x1 = tmp1;
s->x0 = CLEAR(s->x0, n0);
s->x0 = XOR(s->x0, tmp0);
if (n1) s->x1 = CLEAR(s->x1, n1);
if (n1) s->x1 = XOR(s->x1, tmp1);
}
P(s, nr);
in += ASCON_RATE;
out += ASCON_RATE;
len -= ASCON_RATE;
/* compute permutation for full blocks */
if (n == rate) P(s, nr);
in += n;
out += n;
len -= n;
}
/* final block */
if (len) {
tmp1 = WORD_T(0);
if (len >= 8) tmp0 = LOAD(in, 8);
if (len > 8)
tmp1 = LOAD(in + 8, len - 8);
else
tmp0 = LOAD(in, len);
s->x0 = XOR(s->x0, tmp0);
s->x1 = XOR(s->x1, tmp1);
if (mode & ASCON_SQUEEZE) {
if (len >= 8) STORE(out, s->x0, 8);
if (len > 8)
STORE(out + 8, s->x1, len - 8);
else
STORE(out, s->x0, len);
}
if (mode & ASCON_INSERT) {
if (len >= 8) s->x0 = tmp0;
if (len > 8) {
s->x1 = CLEAR(s->x1, len - 8);
s->x1 = XOR(s->x1, tmp1);
} else {
s->x0 = CLEAR(s->x0, len);
s->x0 = XOR(s->x0, tmp0);
}
}
}
if (len < 8)
s->x0 = XOR(s->x0, PAD(len % 8));
if (n % rate < 8)
s->x0 = XOR(s->x0, PAD(n0 % 8));
else
s->x1 = XOR(s->x1, PAD(len % 8));
s->x1 = XOR(s->x1, PAD(n1 % 8));
}
......@@ -4,6 +4,7 @@
#include <stdint.h>
#include "endian.h"
#include "forceinline.h"
#include "interleave.h"
typedef struct {
......@@ -11,102 +12,92 @@ typedef struct {
uint32_t o;
} word_t;
__forceinline uint32_t ROR32(uint32_t x, int n) {
forceinline uint32_t ROR32(uint32_t x, int n) {
return (n == 0) ? x : x >> n | x << (32 - n);
}
__forceinline word_t ROR64(word_t x, int n) {
forceinline word_t ROR(word_t x, int n) {
word_t r;
r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2);
r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2);
return r;
}
__forceinline word_t WORD_T(uint64_t x) {
return (word_t){.o = x >> 32, .e = x};
}
forceinline word_t WORD_T(uint64_t x) { return (word_t){.o = x >> 32, .e = x}; }
__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; }
forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; }
__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); }
forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(deinterleave32(x)); }
__forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); }
forceinline uint64_t WORDTOU64(word_t w) { return interleave32(UINT64_T(w)); }
__forceinline word_t NOT(word_t a) {
forceinline word_t NOT(word_t a) {
a.e = ~a.e;
a.o = ~a.o;
return a;
}
__forceinline word_t XOR(word_t a, word_t b) {
forceinline word_t XOR(word_t a, word_t b) {
a.e ^= b.e;
a.o ^= b.o;
return a;
}
__forceinline word_t AND(word_t a, word_t b) {
forceinline word_t AND(word_t a, word_t b) {
a.e &= b.e;
a.o &= b.o;
return a;
}
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
word_t r;
r.o = lo2hi.o << 16 | hi2lo.o >> 16;
r.e = lo2hi.e << 16 | hi2lo.e >> 16;
r.o = lo2hi.o << 16 | hi2lo.o >> 16;
return r;
}
__forceinline uint8_t NOTZERO(word_t a, word_t b) {
forceinline int NOTZERO(word_t a, word_t b) {
uint32_t result = a.e | a.o | b.e | b.o;
result |= result >> 16;
result |= result >> 8;
return (uint8_t)result;
return ((((int)(result & 0xff) - 1) >> 8) & 1) - 1;
}
__forceinline word_t PAD(int i) {
return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32);
forceinline word_t PAD(int i) {
return WORD_T((uint64_t)(0x8ul << (28 - 4 * i)) << 32);
}
__forceinline word_t CLEAR(word_t w, int n) {
forceinline word_t CLEAR(word_t w, int n) {
/* undefined for n == 0 */
uint32_t mask = 0x0fffffff >> (n * 4 - 4);
return AND(w, WORD_T((uint64_t)mask << 32 | mask));
w.e &= mask;
w.o &= mask;
return w;
}
__forceinline uint64_t MASK(int n) {
forceinline uint64_t MASK(int n) {
/* undefined for n == 0 */
return ~0ull >> (64 - 8 * n);
}
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = *(uint64_t*)bytes;
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes & MASK(n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(n);
*(uint64_t*)bytes |= U64BIG(x);
}
__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i];
return U64TOWORD(x);
}
__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i];
}
......
#define CRYPTO_VERSION "1.2.5"
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16
#define ASCON_AEAD_RATE 16
......@@ -3,15 +3,14 @@
#include <stdint.h>
#include "config.h"
#include "word.h"
typedef struct {
word_t x0, x1, x2, x3, x4;
} state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_aeadinit(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k);
......
#ifndef CONFIG_H_
#define CONFIG_H_
/* inline the Ascon mode */
/* inline the ascon mode */
#ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1
#endif
/* inline the Ascon permutations */
/* inline all permutations */
#ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 0
#endif
/* single function for all permutations */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 1
#endif
/* unroll the permutation loops */
/* unroll permutation loops */
#ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 0
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif
#endif /* CONFIG_H_ */
......@@ -4,7 +4,7 @@
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* macros for big endian machines */
#ifndef NDEBUG
#ifdef PRAGMA_ENDIAN
#pragma message("Using macros for big endian machines")
#endif
#define U64BIG(x) (x)
......@@ -15,7 +15,7 @@
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
/* macros for little endian machines */
#ifndef NDEBUG
#ifdef PRAGMA_ENDIAN
#pragma message("Using macros for little endian machines")
#endif
#define U64BIG(x) \
......
......@@ -3,6 +3,8 @@
#include <stdint.h>
#include "forceinline.h"
uint64_t interleave8(uint64_t x);
#endif /* INTERLEAVE_H_ */
#include "permutations.h"
#include "round.h"
#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM
#if !ASCON_UNROLL_LOOPS
const uint64_t constants[12] = {
0x0101010100000000ull, 0x0101010000000001ull, 0x0101000100000100ull,
......@@ -12,34 +10,16 @@ const uint64_t constants[12] = {
#endif
#if ASCON_INLINE_PERM
#elif ASCON_SINGLE_PERM
void P(state_t* s, uint8_t rounds) {
printstate(" permutation input", s);
for (int i = START(rounds); i < 12; ++i) ROUND(s, constants[i]);
}
#if !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s) { P12ROUNDS(s); }
void P8(state_t* s) { P8ROUNDS(s); }
void P6(state_t* s) { P6ROUNDS(s); }
void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16
void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
#endif
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8
void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
#endif
#if !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
void P(state_t* s, int nr) { PROUNDS(s, nr); }
#endif
......@@ -5,6 +5,7 @@
#include "api.h"
#include "ascon.h"
#include "config.h"
#include "printstate.h"
#include "round.h"
......@@ -14,11 +15,20 @@
#define ASCON_128_RATE 8
#define ASCON_128A_RATE 16
#define ASCON_HASH_RATE 8
#define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PA_ROUNDS 12
#define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_PA_ROUNDS 12
#define ASCON_HASH_PB_ROUNDS 12
#define ASCON_HASHA_PA_ROUNDS 12
#define ASCON_HASHA_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32
#define ASCON_128_IV WORD_T(0x8040000020301000ull)
......@@ -33,132 +43,95 @@
#define ASCON_HASH_IV3 WORD_T(0x2f871f6c6d0082b2ull)
#define ASCON_HASH_IV4 WORD_T(0x7a1ba68850ec407eull)
#define ASCON_HASHA_IV0 WORD_T(0x194c0f180a5d41e4ull)
#define ASCON_HASHA_IV1 WORD_T(0x7faa87825647f3a7ull)
#define ASCON_HASHA_IV2 WORD_T(0x606dbe06db8da430ull)
#define ASCON_HASHA_IV3 WORD_T(0xe0dd6bcf19fbce3bull)
#define ASCON_HASHA_IV4 WORD_T(0x9720dc4446473d8bull)
#define ASCON_XOF_IV0 WORD_T(0x8a46f0d354e771b8ull)
#define ASCON_XOF_IV1 WORD_T(0x04489f4084368cd0ull)
#define ASCON_XOF_IV2 WORD_T(0x6c94f2150dbcf66cull)
#define ASCON_XOF_IV3 WORD_T(0x48965294f143b44eull)
#define ASCON_XOF_IV4 WORD_T(0x0788515fe0e5fb8aull)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#if ASCON_RATE == 16
#define IV ASCON_128A_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define PB P8
#endif
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20
#define IV ASCON_80PQ_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#define ASCON_XOFA_IV0 WORD_T(0x4ab43d4f16a80d2cull)
#define ASCON_XOFA_IV1 WORD_T(0xd0ae310bf0f619ceull)
#define ASCON_XOFA_IV2 WORD_T(0xc08cf3c801d89cf3ull)
#define ASCON_XOFA_IV3 WORD_T(0x3859d2094dac0b35ull)
#define ASCON_XOFA_IV4 WORD_T(0xd274992be52b5357ull)
#define START(n) (12 - n)
#if ASCON_UNROLL_LOOPS
__forceinline void P12ROUNDS(state_t* s) {
ROUND(s, 0x0101010100000000ull);
ROUND(s, 0x0101010000000001ull);
ROUND(s, 0x0101000100000100ull);
ROUND(s, 0x0101000000000101ull);
ROUND(s, 0x0100010100010000ull);
ROUND(s, 0x0100010000010001ull);
ROUND(s, 0x0100000100010100ull);
ROUND(s, 0x0100000000010101ull);
ROUND(s, 0x0001010101000000ull);
ROUND(s, 0x0001010001000001ull);
ROUND(s, 0x0001000101000100ull);
ROUND(s, 0x0001000001000101ull);
#define RC(c) WORD_T(c)
forceinline void P12ROUNDS(state_t* s) {
ROUND(s, RC(0x0101010100000000ull));
ROUND(s, RC(0x0101010000000001ull));
ROUND(s, RC(0x0101000100000100ull));
ROUND(s, RC(0x0101000000000101ull));
ROUND(s, RC(0x0100010100010000ull));
ROUND(s, RC(0x0100010000010001ull));
ROUND(s, RC(0x0100000100010100ull));
ROUND(s, RC(0x0100000000010101ull));
ROUND(s, RC(0x0001010101000000ull));
ROUND(s, RC(0x0001010001000001ull));
ROUND(s, RC(0x0001000101000100ull));
ROUND(s, RC(0x0001000001000101ull));
}
__forceinline void P8ROUNDS(state_t* s) {
ROUND(s, 0x0100010100010000ull);
ROUND(s, 0x0100010000010001ull);
ROUND(s, 0x0100000100010100ull);
ROUND(s, 0x0100000000010101ull);
ROUND(s, 0x0001010101000000ull);
ROUND(s, 0x0001010001000001ull);
ROUND(s, 0x0001000101000100ull);
ROUND(s, 0x0001000001000101ull);
forceinline void P8ROUNDS(state_t* s) {
ROUND(s, RC(0x0100010100010000ull));
ROUND(s, RC(0x0100010000010001ull));
ROUND(s, RC(0x0100000100010100ull));
ROUND(s, RC(0x0100000000010101ull));
ROUND(s, RC(0x0001010101000000ull));
ROUND(s, RC(0x0001010001000001ull));
ROUND(s, RC(0x0001000101000100ull));
ROUND(s, RC(0x0001000001000101ull));
}
__forceinline void P6ROUNDS(state_t* s) {
ROUND(s, 0x0100000100010100ull);
ROUND(s, 0x0100000000010101ull);
ROUND(s, 0x0001010101000000ull);
ROUND(s, 0x0001010001000001ull);
ROUND(s, 0x0001000101000100ull);
ROUND(s, 0x0001000001000101ull);
forceinline void P6ROUNDS(state_t* s) {
ROUND(s, RC(0x0100000100010100ull));
ROUND(s, RC(0x0100000000010101ull));
ROUND(s, RC(0x0001010101000000ull));
ROUND(s, RC(0x0001010001000001ull));
ROUND(s, RC(0x0001000101000100ull));
ROUND(s, RC(0x0001000001000101ull));
}
#else /* !ASCON_UNROLL_LOOPS */
extern const uint64_t constants[12];
__forceinline void P12ROUNDS(state_t* s) {
for (int i = START(12); i < 12; ++i) ROUND(s, constants[i]);
}
__forceinline void P8ROUNDS(state_t* s) {
for (int i = START(8); i < 12; ++i) ROUND(s, constants[i]);
}
__forceinline void P6ROUNDS(state_t* s) {
for (int i = START(6); i < 12; ++i) ROUND(s, constants[i]);
forceinline void PROUNDS(state_t* s, int nr) {
for (int i = START(nr); i < 12; i++) ROUND(s, RC(constants[i]));
}
#endif
#if ASCON_INLINE_PERM
#if ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
__forceinline void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
forceinline void P(state_t* s, int nr) {
if (nr == 12) P12ROUNDS(s);
if (nr == 8) P8ROUNDS(s);
if (nr == 6) P6ROUNDS(s);
}
__forceinline void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
#elif !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
__forceinline void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
forceinline void P(state_t* s, int nr) {
if (nr == 12) P12(s);
if (nr == 8) P8(s);
if (nr == 6) P6(s);
}
#elif ASCON_SINGLE_PERM
#define P12(s) P(s, 12)
#define P8(s) P(s, 8)
#define P6(s) P(s, 6)
#elif ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
void P(state_t* s, uint8_t rounds);
forceinline void P(state_t* s, int nr) { PROUNDS(s, nr); }
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
#else /* !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS */
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
void P(state_t* s, int nr);
#endif
......
#ifndef PRINTSTATE_H_
#define PRINTSTATE_H_
#ifdef NDEBUG
#ifdef ASCON_PRINTSTATE
#define printword(text, w)
#define printstate(text, s)
#include "ascon.h"
#include "word.h"
#else
void printword(const char* text, const word_t x);
void printstate(const char* text, const state_t* s);
#include <inttypes.h>
#include <stdio.h>
#else
#include "ascon.h"
#include "word.h"
#define printword(text, w) \
do { \
} while (0)
__forceinline void printword(const char* text, const word_t x) {
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x));
}
__forceinline void printstate(const char* text, const state_t* s) {
printf("%s:\n", text);
printword(" x0", s->x0);
printword(" x1", s->x1);
printword(" x2", s->x2);
printword(" x3", s->x3);
printword(" x4", s->x4);
}
#define printstate(text, s) \
do { \
} while (0)
#endif
......
......@@ -4,13 +4,13 @@
#include "ascon.h"
#include "printstate.h"
__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
*K0 = WORD_T(0);
*K1 = WORD_T(0);
*K2 = WORD_T(0);
}
__forceinline void PINIT(state_t* s) {
forceinline void PINIT(state_t* s) {
s->x0 = WORD_T(0);
s->x1 = WORD_T(0);
s->x2 = WORD_T(0);
......@@ -18,34 +18,34 @@ __forceinline void PINIT(state_t* s) {
s->x4 = WORD_T(0);
}
__forceinline void ROUND(state_t* s, uint64_t C) {
word_t tmp;
forceinline void ROUND(state_t* s, word_t C) {
word_t xtemp;
/* round constant */
s->x2 = XOR(s->x2, WORD_T(C));
s->x2 = XOR(s->x2, C);
/* s-box layer */
s->x0 = XOR(s->x0, s->x4);
s->x4 = XOR(s->x4, s->x3);
s->x2 = XOR(s->x2, s->x1);
tmp = AND(s->x0, NOT(s->x4));
xtemp = AND(s->x0, NOT(s->x4));
s->x0 = XOR(s->x0, AND(s->x2, NOT(s->x1)));
s->x2 = XOR(s->x2, AND(s->x4, NOT(s->x3)));
s->x4 = XOR(s->x4, AND(s->x1, NOT(s->x0)));
s->x1 = XOR(s->x1, AND(s->x3, NOT(s->x2)));
s->x3 = XOR(s->x3, tmp);
s->x3 = XOR(s->x3, xtemp);
s->x1 = XOR(s->x1, s->x0);
s->x3 = XOR(s->x3, s->x2);
s->x0 = XOR(s->x0, s->x4);
/* linear layer */
tmp = XOR(s->x0, ROR64(s->x0, 28 - 19));
s->x0 = XOR(s->x0, ROR64(tmp, 19));
tmp = XOR(s->x1, ROR64(s->x1, 61 - 39));
s->x1 = XOR(s->x1, ROR64(tmp, 39));
tmp = XOR(s->x2, ROR64(s->x2, 6 - 1));
s->x2 = XOR(s->x2, ROR64(tmp, 1));
tmp = XOR(s->x3, ROR64(s->x3, 17 - 10));
s->x3 = XOR(s->x3, ROR64(tmp, 10));
tmp = XOR(s->x4, ROR64(s->x4, 41 - 7));
s->x4 = XOR(s->x4, ROR64(tmp, 7));
xtemp = XOR(s->x0, ROR(s->x0, 28 - 19));
s->x0 = XOR(s->x0, ROR(xtemp, 19));
xtemp = XOR(s->x1, ROR(s->x1, 61 - 39));
s->x1 = XOR(s->x1, ROR(xtemp, 39));
xtemp = XOR(s->x2, ROR(s->x2, 6 - 1));
s->x2 = XOR(s->x2, ROR(xtemp, 1));
xtemp = XOR(s->x3, ROR(s->x3, 17 - 10));
s->x3 = XOR(s->x3, ROR(xtemp, 10));
xtemp = XOR(s->x4, ROR(s->x4, 41 - 7));
s->x4 = XOR(s->x4, ROR(xtemp, 7));
s->x2 = NOT(s->x2);
printstate(" round output", s);
}
......
......@@ -4,6 +4,7 @@
#include <stdint.h>
#include "endian.h"
#include "forceinline.h"
#include "interleave.h"
typedef union {
......@@ -11,9 +12,9 @@ typedef union {
uint8_t b[8];
} word_t;
__forceinline uint8_t ROR8(uint8_t a, int n) { return a >> n | a << (8 - n); }
forceinline uint8_t ROR8(uint8_t a, int n) { return a >> n | a << (8 - n); }
__forceinline word_t ROR64(word_t a, int n) {
forceinline word_t ROR(word_t a, int n) {
word_t b;
b.b[0] = ROR8(a.b[(n + 0) & 0x7], (n + 0) >> 3);
b.b[1] = ROR8(a.b[(n + 1) & 0x7], (n + 1) >> 3);
......@@ -26,54 +27,54 @@ __forceinline word_t ROR64(word_t a, int n) {
return b;
}
__forceinline word_t WORD_T(uint64_t x) {
forceinline word_t WORD_T(uint64_t x) {
word_t w;
w.w = x;
return w;
}
__forceinline uint64_t UINT64_T(word_t w) {
forceinline uint64_t UINT64_T(word_t w) {
uint64_t x;
x = w.w;
return x;
}
__forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(interleave8(x)); }
forceinline word_t U64TOWORD(uint64_t x) { return WORD_T(interleave8(x)); }
__forceinline uint64_t WORDTOU64(word_t w) { return interleave8(UINT64_T(w)); }
forceinline uint64_t WORDTOU64(word_t w) { return interleave8(UINT64_T(w)); }
__forceinline word_t NOT(word_t a) {
forceinline word_t NOT(word_t a) {
a.w = ~a.w;
return a;
}
__forceinline word_t XOR(word_t a, word_t b) {
forceinline word_t XOR(word_t a, word_t b) {
a.w ^= b.w;
return a;
}
__forceinline word_t AND(word_t a, word_t b) {
forceinline word_t AND(word_t a, word_t b) {
a.w &= b.w;
return a;
}
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
word_t w;
w.w = lo2hi.w << 32 | hi2lo.w >> 32;
return w;
}
__forceinline uint8_t NOTZERO(word_t a, word_t b) {
forceinline int NOTZERO(word_t a, word_t b) {
uint64_t result = a.w | b.w;
result |= result >> 32;
result |= result >> 16;
result |= result >> 8;
return (uint8_t)result;
return ((((int)(result & 0xff) - 1) >> 8) & 1) - 1;
}
__forceinline word_t PAD(int i) { return (word_t){.b[7] = 0x80 >> i}; }
forceinline word_t PAD(int i) { return (word_t){.b[7] = 0x80 >> i}; }
__forceinline word_t CLEAR(word_t w, int n) {
forceinline word_t CLEAR(word_t w, int n) {
/* undefined for n == 0 */
uint8_t m = 0xff >> n;
word_t mask = {
......@@ -89,39 +90,29 @@ __forceinline word_t CLEAR(word_t w, int n) {
return AND(w, mask);
}
__forceinline uint64_t MASK(int n) {
forceinline uint64_t MASK(int n) {
/* undefined for n == 0 */
return ~0ull >> (64 - 8 * n);
}
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = *(uint64_t*)bytes;
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes & MASK(n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(n);
*(uint64_t*)bytes |= U64BIG(x);
}
__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i];
return U64TOWORD(x);
}
__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i];
}
......
......@@ -121,15 +121,19 @@
forceinline void ascon_loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 16) {
*K1 = XOR(*K1, LOAD(k, 8));
*K2 = XOR(*K2, LOAD(k + 8, 8));
}
if (CRYPTO_KEYBYTES == 20) {
*K0 = XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4)));
k += 4;
*K0 = XOR(*K0, KEYROT(WORD_T(0), LOADBYTES(k, 4)));
*K1 = XOR(*K1, LOADBYTES(k + 4, 8));
*K2 = XOR(*K2, LOADBYTES(k + 12, 8));
}
*K1 = XOR(*K1, LOAD(k, 8));
*K2 = XOR(*K2, LOAD(k + 8, 8));
}
forceinline void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
forceinline void ascon_aeadinit(state_t* s, const uint8_t* npub,
const uint8_t* k) {
/* load nonce */
word_t N0 = LOAD(npub, 8);
word_t N1 = LOAD(npub + 8, 8);
......@@ -138,9 +142,9 @@ forceinline void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
ascon_loadkey(&K0, &K1, &K2, k);
/* initialize */
PINIT(s);
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8)
if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 8)
s->x0 = XOR(s->x0, ASCON_128_IV);
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16)
if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 16)
s->x0 = XOR(s->x0, ASCON_128A_IV);
if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, ASCON_80PQ_IV);
if (CRYPTO_KEYBYTES == 20) s->x0 = XOR(s->x0, K0);
......@@ -156,13 +160,13 @@ forceinline void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
}
forceinline void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen) {
const int nr = (ASCON_RATE == 8) ? 6 : 8;
const int nr = (ASCON_AEAD_RATE == 8) ? 6 : 8;
if (adlen) {
/* full associated data blocks */
AD();
/* final associated data block */
word_t* px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) {
if (ASCON_AEAD_RATE == 16 && adlen >= 8) {
s->x0 = XOR(s->x0, LOAD(ad, 8));
px = &s->x1;
ad += 8;
......@@ -179,12 +183,12 @@ forceinline void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen) {
forceinline void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
const int nr = (ASCON_RATE == 8) ? 6 : 8;
const int nr = (ASCON_AEAD_RATE == 8) ? 6 : 8;
/* full plaintext blocks */
PT();
/* final plaintext block */
word_t* px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) {
if (ASCON_AEAD_RATE == 16 && mlen >= 8) {
s->x0 = XOR(s->x0, LOAD(m, 8));
STORE(c, s->x0, 8);
px = &s->x1;
......@@ -202,12 +206,12 @@ forceinline void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m,
forceinline void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
const int nr = (ASCON_RATE == 8) ? 6 : 8;
const int nr = (ASCON_AEAD_RATE == 8) ? 6 : 8;
/* full ciphertext blocks */
CT();
/* final ciphertext block */
word_t* px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) {
if (ASCON_AEAD_RATE == 16 && clen >= 8) {
word_t cx = LOAD(c, 8);
s->x0 = XOR(s->x0, cx);
STORE(m, s->x0, 8);
......@@ -233,11 +237,11 @@ forceinline void ascon_final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2;
ascon_loadkey(&K0, &K1, &K2, k);
/* finalize */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 8) {
s->x1 = XOR(s->x1, K1);
s->x2 = XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
if (CRYPTO_KEYBYTES == 16 && ASCON_AEAD_RATE == 16) {
s->x2 = XOR(s->x2, K1);
s->x3 = XOR(s->x3, K2);
}
......@@ -261,7 +265,7 @@ int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
(void)nsec;
*clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */
ascon_init(&s, npub, k);
ascon_aeadinit(&s, npub, k);
ascon_adata(&s, ad, adlen);
ascon_encrypt(&s, c, m, mlen);
ascon_final(&s, k);
......@@ -281,7 +285,7 @@ int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
if (clen < CRYPTO_ABYTES) return -1;
*mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */
ascon_init(&s, npub, k);
ascon_aeadinit(&s, npub, k);
ascon_adata(&s, ad, adlen);
ascon_decrypt(&s, m, c, clen);
ascon_final(&s, k);
......
#define CRYPTO_VERSION "1.2.5"
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16
#define ASCON_AEAD_RATE 16
#ifndef CONFIG_H_
#define CONFIG_H_
/* inline the Ascon mode */
/* inline the ascon mode */
#ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1
#define ASCON_INLINE_MODE 0
#endif
/* inline the Ascon permutations */
/* inline all permutations */
#ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 1
#endif
/* single function for all permutations */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 0
#endif
/* unroll the permutation loops */
/* unroll permutation loops */
#ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 1
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif
#endif /* CONFIG_H_ */
......@@ -4,7 +4,7 @@
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* macros for big endian machines */
#ifndef NDEBUG
#ifdef PRAGMA_ENDIAN
#pragma message("Using macros for big endian machines")
#endif
#define U64BIG(x) (x)
......@@ -15,7 +15,7 @@
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
/* macros for little endian machines */
#ifndef NDEBUG
#ifdef PRAGMA_ENDIAN
#pragma message("Using macros for little endian machines")
#endif
#define U64BIG(x) \
......
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
#include <stdint.h>
#include "api.h"
#include "ascon.h"
#include "config.h"
#include "printstate.h"
#include "round.h"
static const uint64_t C[12] = {
#define ASCON_128_KEYBYTES 16
#define ASCON_128A_KEYBYTES 16
#define ASCON_80PQ_KEYBYTES 20
#define ASCON_128_RATE 8
#define ASCON_128A_RATE 16
#define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32
#define ASCON_128_IV WORD_T(0x80400c0600000000ull)
#define ASCON_128A_IV WORD_T(0x80800c0800000000ull)
#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000ull)
#define ASCON_HASH_IV WORD_T(0x00400c0000000100ull)
#define ASCON_XOF_IV WORD_T(0x00400c0000000000ull)
#define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull)
#define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull)
#define ASCON_HASH_IV2 WORD_T(0xb48a92db98d5da62ull)
#define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull)
#define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull)
#define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull)
#define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull)
#define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull)
#define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull)
#define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull)
#define START(n) ((3 + (n)) << 4 | (12 - (n)))
#define RC(c) WORD_T(c)
const uint64_t C[12] = {
0xffffffffffffff0full, 0xffffffffffffff1eull, 0xffffffffffffff2dull,
0xffffffffffffff3cull, 0xffffffffffffff4bull, 0xffffffffffffff5aull,
0xffffffffffffff69ull, 0xffffffffffffff78ull, 0xffffffffffffff87ull,
0xffffffffffffff96ull, 0xffffffffffffffa5ull, 0xffffffffffffffb4ull,
};
#define P12() \
#define P12ROUNDS(s) \
ROUND(0) \
ROUND(8) \
ROUND(16) \
ROUND(24) \
ROUND(32) \
ROUND(40) \
ROUND(48) \
ROUND(56) \
ROUND(64) \
ROUND(72) \
ROUND(80) \
ROUND(88)
#define P8ROUNDS(s) \
ROUND(32) \
ROUND(40) \
ROUND(48) \
ROUND(56) \
ROUND(64) \
ROUND(72) \
ROUND(80) \
ROUND(88)
#define P6ROUNDS(s) \
ROUND(48) \
ROUND(56) \
ROUND(64) \
ROUND(72) \
ROUND(80) \
ROUND(88)
forceinline void P12(state_t* s) {
__asm__ __volatile__ ( \
".arm \n\t" \
".fpu neon \n\t" \
"vldm %[s], {d0-d4} \n\t" \
"vmvn d2, d2 \n\t" \
ROUND(0) \
ROUND(8) \
ROUND(16) \
ROUND(24) \
ROUND(32) \
ROUND(40) \
ROUND(48) \
ROUND(56) \
ROUND(64) \
ROUND(72) \
ROUND(80) \
ROUND(88) \
P12ROUNDS(s) \
"vmvn d2, d2 \n\t" \
"vstm %[s], {d0-d4} \n\t" \
:: [s] "r" (&s), [C] "r" (C) \
:: [s] "r" (s), [C] "r" (C) \
: "d0", "d1", "d2", "d3", "d4", \
"d10", "d11", "d12", "d13", "d14", \
"d20", "d21", "d22", "d23", "d24", \
"d31", "memory")
"d31", "memory");
}
#define P8() \
forceinline void P8(state_t* s) {
__asm__ __volatile__ ( \
".arm \n\t" \
".fpu neon \n\t" \
"vldm %[s], {d0-d4} \n\t" \
"vmvn d2, d2 \n\t" \
ROUND(32) \
ROUND(40) \
ROUND(48) \
ROUND(56) \
ROUND(64) \
ROUND(72) \
ROUND(80) \
ROUND(88) \
P8ROUNDS(s) \
"vmvn d2, d2 \n\t" \
"vstm %[s], {d0-d4} \n\t" \
:: [s] "r" (&s), [C] "r" (C) \
:: [s] "r" (s), [C] "r" (C) \
: "d0", "d1", "d2", "d3", "d4", \
"d10", "d11", "d12", "d13", "d14", \
"d20", "d21", "d22", "d23", "d24", \
"d31", "memory")
#define AD() \
do { \
uint32_t adlen_hi = (uint32_t)(adlen >> 32); \
uint32_t adlen_lo = (uint32_t)adlen; \
__asm__ __volatile__ ( \
".arm \n\t" \
".fpu neon \n\t" \
"cmp %[adlen_hi], #0 \n\t" \
"cmpeq %[adlen_lo], #15 \n\t" \
"bls .LAD1 \n\t" \
"vldm %[s], {d0-d4} \n\t" \
".LAD0: \n\t" \
"vldm %[ad]!, {d16,d17} \n\t" \
"vrev64.8 q8, q8 \n\t" \
"veor q0, q0, q8 \n\t" \
"vmvn d2, d2 \n\t" \
ROUND(32) \
ROUND(40) \
ROUND(48) \
ROUND(56) \
ROUND(64) \
ROUND(72) \
ROUND(80) \
ROUND(88) \
"vmvn d2, d2 \n\t" \
"subs %[adlen_lo], %[adlen_lo], #16 \n\t" \
"sbc %[adlen_hi], %[adlen_hi], #0 \n\t" \
"cmp %[adlen_hi], #0 \n\t" \
"cmpeq %[adlen_lo], #15 \n\t" \
"bhi .LAD0 \n\t" \
"vstm %[s], {d0-d4} \n\t" \
".LAD1: \n\t" \
: [adlen_hi] "+r" (adlen_hi), [adlen_lo] "+r" (adlen_lo), \
[ad] "+r" (ad) \
: [s] "r" (&s), [C] "r" (C) \
: "d0", "d1", "d2", "d3", "d4", \
"d10", "d11", "d12", "d13", "d14", "d16", "d17", \
"d20", "d21", "d22", "d23", "d24", \
"d31", "memory"); \
adlen = (uint64_t)adlen_hi << 32 | adlen_lo; \
} while (0)
#define PT() \
do { \
uint32_t mlen_hi = (uint32_t)(mlen >> 32); \
uint32_t mlen_lo = (uint32_t)mlen; \
__asm__ __volatile__ ( \
".arm \n\t" \
".fpu neon \n\t" \
"cmp %[mlen_hi], #0 \n\t" \
"cmpeq %[mlen_lo], #15 \n\t" \
"bls .LPT1 \n\t" \
"vldm %[s], {d0-d4} \n\t" \
".LPT0: \n\t" \
"vldm %[m]!, {d16,d17} \n\t" \
"vrev64.8 q8, q8 \n\t" \
"veor q0, q0, q8 \n\t" \
"vrev64.8 q13, q0 \n\t" \
"vstm %[c]!, {d26,d27} \n\t" \
"vmvn d2, d2 \n\t" \
ROUND(32) \
ROUND(40) \
ROUND(48) \
ROUND(56) \
ROUND(64) \
ROUND(72) \
ROUND(80) \
ROUND(88) \
"vmvn d2, d2 \n\t" \
"subs %[mlen_lo], %[mlen_lo], #16 \n\t" \
"sbc %[mlen_hi], %[mlen_hi], #0 \n\t" \
"cmp %[mlen_hi], #0 \n\t" \
"cmpeq %[mlen_lo], #15 \n\t" \
"bhi .LPT0 \n\t" \
"vstm %[s], {d0-d4} \n\t" \
".LPT1: \n\t" \
: [mlen_hi] "+r" (mlen_hi), [mlen_lo] "+r" (mlen_lo), \
[m] "+r" (m), [c] "+r" (c) \
: [s] "r" (&s), [C] "r" (C) \
: "d0", "d1", "d2", "d3", "d4", \
"d10", "d11", "d12", "d13", "d14", "d16", "d17", \
"d20", "d21", "d22", "d23", "d24", "d26", "d27", \
"d31", "memory"); \
mlen = (uint64_t)mlen_hi << 32 | mlen_lo; \
} while (0)
#define CT() \
do { \
uint32_t clen_hi = (uint32_t)(clen >> 32); \
uint32_t clen_lo = (uint32_t)clen; \
__asm__ __volatile__ ( \
".arm \n\t" \
".fpu neon \n\t" \
"cmp %[clen_hi], #0 \n\t" \
"cmpeq %[clen_lo], #15 \n\t" \
"bls .LCT1 \n\t" \
"vldm %[s], {d0-d4} \n\t" \
".LCT0: \n\t" \
"vldm %[c]!, {d26,d27} \n\t" \
"vrev64.8 q8, q0 \n\t" \
"veor q8, q8, q13 \n\t" \
"vrev64.8 q0, q13 \n\t" \
"vstm %[m]!, {d16,d17} \n\t" \
"vmvn d2, d2 \n\t" \
ROUND(32) \
ROUND(40) \
ROUND(48) \
ROUND(56) \
ROUND(64) \
ROUND(72) \
ROUND(80) \
ROUND(88) \
"vmvn d2, d2 \n\t" \
"subs %[clen_lo], %[clen_lo], #16 \n\t" \
"sbc %[clen_hi], %[clen_hi], #0 \n\t" \
"cmp %[clen_hi], #0 \n\t" \
"cmpeq %[clen_lo], #15 \n\t" \
"bhi .LCT0 \n\t" \
"vstm %[s], {d0-d4} \n\t" \
".LCT1: \n\t" \
: [clen_hi] "+r" (clen_hi), [clen_lo] "+r" (clen_lo), \
[m] "+r" (m), [c] "+r" (c) \
: [s] "r" (&s), [C] "r" (C) \
: "d0", "d1", "d2", "d3", "d4", \
"d10", "d11", "d12", "d13", "d14", "d16", "d17", \
"d20", "d21", "d22", "d23", "d24", "d26", "d27", \
"d31", "memory"); \
clen = (uint64_t)clen_hi << 32 | clen_lo; \
} while (0)
"d31", "memory");
}
forceinline void P6(state_t* s) {
__asm__ __volatile__ ( \
".arm \n\t" \
".fpu neon \n\t" \
"vldm %[s], {d0-d4} \n\t" \
"vmvn d2, d2 \n\t" \
P6ROUNDS(s) \
"vmvn d2, d2 \n\t" \
"vstm %[s], {d0-d4} \n\t" \
:: [s] "r" (s), [C] "r" (C) \
: "d0", "d1", "d2", "d3", "d4", \
"d10", "d11", "d12", "d13", "d14", \
"d20", "d21", "d22", "d23", "d24", \
"d31", "memory");
}
forceinline void P(state_t* s, int nr) {
if (nr == 12) P12(s);
if (nr == 8) P8(s);
if (nr == 6) P6(s);
}
#endif /* PERMUTATIONS_H_ */
#ifndef ROUND_H_
#define ROUND_H_
#include "ascon.h"
#include "printstate.h"
forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
*K0 = WORD_T(0);
*K1 = WORD_T(0);
*K2 = WORD_T(0);
}
forceinline void PINIT(state_t* s) {
s->x0 = WORD_T(0);
s->x1 = WORD_T(0);
s->x2 = WORD_T(0);
s->x3 = WORD_T(0);
s->x4 = WORD_T(0);
}
/* clang-format off */
#define ROUND(OFFSET) \
"vldr d31, [%[C], #" #OFFSET "] \n\t" \
......
......@@ -3,28 +3,73 @@
#include <stdint.h>
#define WORDTOU64
#define U64TOWORD
#include "endian.h"
#include "forceinline.h"
typedef uint64_t word_t;
#define WORD_T
#define UINT64_T
#define U64TOWORD
#define WORDTOU64
/* get byte from Ascon 64-bit word */
#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i))))
/* set byte in Ascon 64-bit word */
#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i)))
/* set padding byte in Ascon 64-bit word */
#define PAD(i) SETBYTE(0x80, i)
forceinline word_t ROR(word_t x, int n) { return x >> n | x << (64 - n); }
forceinline word_t NOT(word_t a) { return ~a; }
forceinline word_t XOR(word_t a, word_t b) { return a ^ b; }
forceinline word_t AND(word_t a, word_t b) { return a & b; }
forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
return lo2hi << 32 | hi2lo >> 32;
}
forceinline int NOTZERO(word_t a, word_t b) {
uint64_t result = a | b;
result |= result >> 32;
result |= result >> 16;
result |= result >> 8;
return ((((int)(result & 0xff) - 1) >> 8) & 1) - 1;
}
forceinline word_t PAD(int i) { return 0x80ull << (56 - 8 * i); }
forceinline word_t CLEAR(word_t w, int n) {
/* undefined for n == 0 */
uint64_t mask = 0x00ffffffffffffffull >> (n * 8 - 8);
return w & mask;
}
forceinline uint64_t MASK(int n) {
/* undefined for n == 0 */
return ~0ull >> (64 - 8 * n);
}
forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes & MASK(n);
return U64BIG(x);
}
forceinline void STORE(uint8_t* bytes, word_t w, int n) {
*(uint64_t*)bytes &= ~MASK(n);
*(uint64_t*)bytes |= U64BIG(w);
}
static inline uint64_t LOADBYTES(const uint8_t* bytes, int n) {
forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 0; i < n; ++i) x |= SETBYTE(bytes[i], i);
for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i];
return x;
}
static inline void STOREBYTES(uint8_t* bytes, uint64_t x, int n) {
for (int i = 0; i < n; ++i) bytes[i] = GETBYTE(x, i);
forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&w)[7 - i];
}
static inline uint64_t CLEARBYTES(uint64_t x, int n) {
......
#define CRYPTO_VERSION "1.2.5"
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16
#define ASCON_AEAD_RATE 16
......@@ -3,15 +3,14 @@
#include <stdint.h>
#include "config.h"
#include "word.h"
typedef struct {
word_t x0, x1, x2, x3, x4;
} state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_aeadinit(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_adata(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k);
......
#ifndef CONFIG_H_
#define CONFIG_H_
/* inline the Ascon mode */
/* inline the ascon mode */
#ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1
#endif
/* inline the Ascon permutations */
/* inline all permutations */
#ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 1
#endif
/* single function for all permutations */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 0
#endif
/* unroll the permutation loops */
/* unroll permutation loops */
#ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 1
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif
#endif /* CONFIG_H_ */
......@@ -4,7 +4,7 @@
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* macros for big endian machines */
#ifndef NDEBUG
#ifdef PRAGMA_ENDIAN
#pragma message("Using macros for big endian machines")
#endif
#define U64BIG(x) (x)
......@@ -15,7 +15,7 @@
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
/* macros for little endian machines */
#ifndef NDEBUG
#ifdef PRAGMA_ENDIAN
#pragma message("Using macros for little endian machines")
#endif
#define U64BIG(x) \
......
#include "permutations.h"
#include "round.h"
#if !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
#if ASCON_INLINE_PERM
void P12(state_t* s) { P12ROUNDS(s); }
void P8(state_t* s) { P8ROUNDS(s); }
void P6(state_t* s) { P6ROUNDS(s); }
#elif ASCON_SINGLE_PERM
void P(state_t* s, uint8_t rounds) {
printstate(" permutation input", s);
for (int i = START(rounds); i > 0x4a; i -= 0x0f) ROUND(s, i);
}
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16
void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
#endif
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8
void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
#endif
#if !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
void P(state_t* s, int nr) { PROUNDS(s, nr); }
#endif
......@@ -5,6 +5,7 @@
#include "api.h"
#include "ascon.h"
#include "config.h"
#include "printstate.h"
#include "round.h"
......@@ -14,18 +15,29 @@
#define ASCON_128_RATE 8
#define ASCON_128A_RATE 16
#define ASCON_HASH_RATE 8
#define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PA_ROUNDS 12
#define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_PA_ROUNDS 12
#define ASCON_HASH_PB_ROUNDS 12
#define ASCON_HASHA_PA_ROUNDS 12
#define ASCON_HASHA_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32
#define ASCON_128_IV WORD_T(0x80400c0600000000)
#define ASCON_128A_IV WORD_T(0x80800c0800000000)
#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000)
#define ASCON_HASH_IV WORD_T(0x00400c0000000100)
#define ASCON_XOF_IV WORD_T(0x00400c0000000000)
#define ASCON_128_IV WORD_T(0x80400c0600000000ull)
#define ASCON_128A_IV WORD_T(0x80800c0800000000ull)
#define ASCON_80PQ_IV WORD_T(0xa0400c0600000000ull)
#define ASCON_HASH_IV WORD_T(0x00400c0000000100ull)
#define ASCON_HASHA_IV WORD_T(0x00400c0400000100ull)
#define ASCON_XOF_IV WORD_T(0x00400c0000000000ull)
#define ASCON_XOFA_IV WORD_T(0x00400c0400000000ull)
#define ASCON_HASH_IV0 WORD_T(0xee9398aadb67f03dull)
#define ASCON_HASH_IV1 WORD_T(0x8bb21831c60f1002ull)
......@@ -33,130 +45,93 @@
#define ASCON_HASH_IV3 WORD_T(0x43189921b8f8e3e8ull)
#define ASCON_HASH_IV4 WORD_T(0x348fa5c9d525e140ull)
#define ASCON_HASHA_IV0 WORD_T(0x01470194fc6528a6ull)
#define ASCON_HASHA_IV1 WORD_T(0x738ec38ac0adffa7ull)
#define ASCON_HASHA_IV2 WORD_T(0x2ec8e3296c76384cull)
#define ASCON_HASHA_IV3 WORD_T(0xd6f6a54d7f52377dull)
#define ASCON_HASHA_IV4 WORD_T(0xa13c42a223be8d87ull)
#define ASCON_XOF_IV0 WORD_T(0xb57e273b814cd416ull)
#define ASCON_XOF_IV1 WORD_T(0x2b51042562ae2420ull)
#define ASCON_XOF_IV2 WORD_T(0x66a3a7768ddf2218ull)
#define ASCON_XOF_IV3 WORD_T(0x5aad0a7a8153650cull)
#define ASCON_XOF_IV4 WORD_T(0x4f3e0e32539493b6ull)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#if ASCON_RATE == 16
#define IV ASCON_128A_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define PB P8
#endif
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20
#define IV ASCON_80PQ_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#define ASCON_XOFA_IV0 WORD_T(0x44906568b77b9832ull)
#define ASCON_XOFA_IV1 WORD_T(0xcd8d6cae53455532ull)
#define ASCON_XOFA_IV2 WORD_T(0xf7b5212756422129ull)
#define ASCON_XOFA_IV3 WORD_T(0x246885e1de0d225bull)
#define ASCON_XOFA_IV4 WORD_T(0xa8cb5ce33449973full)
#define START(n) ((3 + (n)) << 4 | (12 - (n)))
#if ASCON_UNROLL_LOOPS
__forceinline void P12ROUNDS(state_t* s) {
ROUND(s, 0xf0);
ROUND(s, 0xe1);
ROUND(s, 0xd2);
ROUND(s, 0xc3);
ROUND(s, 0xb4);
ROUND(s, 0xa5);
ROUND(s, 0x96);
ROUND(s, 0x87);
ROUND(s, 0x78);
ROUND(s, 0x69);
ROUND(s, 0x5a);
ROUND(s, 0x4b);
}
__forceinline void P8ROUNDS(state_t* s) {
ROUND(s, 0xb4);
ROUND(s, 0xa5);
ROUND(s, 0x96);
ROUND(s, 0x87);
ROUND(s, 0x78);
ROUND(s, 0x69);
ROUND(s, 0x5a);
ROUND(s, 0x4b);
}
__forceinline void P6ROUNDS(state_t* s) {
ROUND(s, 0x96);
ROUND(s, 0x87);
ROUND(s, 0x78);
ROUND(s, 0x69);
ROUND(s, 0x5a);
ROUND(s, 0x4b);
#define RC(c) WORD_T(c)
forceinline void P12ROUNDS(state_t* s) {
ROUND(s, RC(0xf0));
ROUND(s, RC(0xe1));
ROUND(s, RC(0xd2));
ROUND(s, RC(0xc3));
ROUND(s, RC(0xb4));
ROUND(s, RC(0xa5));
ROUND(s, RC(0x96));
ROUND(s, RC(0x87));
ROUND(s, RC(0x78));
ROUND(s, RC(0x69));
ROUND(s, RC(0x5a));
ROUND(s, RC(0x4b));
}
#else /* !ASCON_UNROLL_LOOPS */
__forceinline void P12ROUNDS(state_t* s) {
for (int i = START(12); i > 0x4a; i -= 0x0f) ROUND(s, i);
forceinline void P8ROUNDS(state_t* s) {
ROUND(s, RC(0xb4));
ROUND(s, RC(0xa5));
ROUND(s, RC(0x96));
ROUND(s, RC(0x87));
ROUND(s, RC(0x78));
ROUND(s, RC(0x69));
ROUND(s, RC(0x5a));
ROUND(s, RC(0x4b));
}
__forceinline void P8ROUNDS(state_t* s) {
for (int i = START(8); i > 0x4a; i -= 0x0f) ROUND(s, i);
forceinline void P6ROUNDS(state_t* s) {
ROUND(s, RC(0x96));
ROUND(s, RC(0x87));
ROUND(s, RC(0x78));
ROUND(s, RC(0x69));
ROUND(s, RC(0x5a));
ROUND(s, RC(0x4b));
}
__forceinline void P6ROUNDS(state_t* s) {
for (int i = START(6); i > 0x4a; i -= 0x0f) ROUND(s, i);
forceinline void PROUNDS(state_t* s, int nr) {
for (int i = START(nr); i > 0x4a; i -= 0x0f) ROUND(s, RC(i));
}
#endif
#if ASCON_INLINE_PERM
#if ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
__forceinline void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
forceinline void P(state_t* s, int nr) {
if (nr == 12) P12ROUNDS(s);
if (nr == 8) P8ROUNDS(s);
if (nr == 6) P6ROUNDS(s);
}
__forceinline void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
#elif !ASCON_INLINE_PERM && ASCON_UNROLL_LOOPS
__forceinline void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
forceinline void P(state_t* s, int nr) {
if (nr == 12) P12(s);
if (nr == 8) P8(s);
if (nr == 6) P6(s);
}
#elif ASCON_SINGLE_PERM
#define P12(s) P(s, 12)
#define P8(s) P(s, 8)
#define P6(s) P(s, 6)
#elif ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS
void P(state_t* s, uint8_t rounds);
forceinline void P(state_t* s, int nr) { PROUNDS(s, nr); }
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
#else /* !ASCON_INLINE_PERM && !ASCON_UNROLL_LOOPS */
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
void P(state_t* s, int nr);
#endif
......
#ifndef PRINTSTATE_H_
#define PRINTSTATE_H_
#ifdef NDEBUG
#ifdef ASCON_PRINTSTATE
#define printword(text, w)
#define printstate(text, s)
#include "ascon.h"
#include "word.h"
#else
void printword(const char* text, const word_t x);
void printstate(const char* text, const state_t* s);
#include <inttypes.h>
#include <stdio.h>
#else
#include "ascon.h"
#include "word.h"
#define printword(text, w) \
do { \
} while (0)
__forceinline void printword(const char* text, const word_t x) {
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x));
}
__forceinline void printstate(const char* text, const state_t* s) {
printf("%s:\n", text);
printword(" x0", s->x0);
printword(" x1", s->x1);
printword(" x2", s->x2);
printword(" x3", s->x3);
printword(" x4", s->x4);
}
#define printstate(text, s) \
do { \
} while (0)
#endif
......
......@@ -4,13 +4,13 @@
#include "ascon.h"
#include "printstate.h"
__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
*K0 = WORD_T(0);
*K1 = WORD_T(0);
*K2 = WORD_T(0);
}
__forceinline void PINIT(state_t* s) {
forceinline void PINIT(state_t* s) {
s->x0 = WORD_T(0);
s->x1 = WORD_T(0);
s->x2 = WORD_T(0);
......@@ -18,51 +18,34 @@ __forceinline void PINIT(state_t* s) {
s->x4 = WORD_T(0);
}
__forceinline void ROUND(state_t* s, uint64_t C) {
forceinline void ROUND(state_t* s, word_t C) {
state_t t;
s->x2 ^= C;
s->x0 ^= s->x4;
s->x4 ^= s->x3;
s->x2 ^= s->x1;
t.x0 = s->x0;
t.x4 = s->x4;
t.x3 = s->x3;
t.x1 = s->x1;
t.x2 = s->x2;
s->x0 = t.x0 ^ (~t.x1 & t.x2);
s->x2 = t.x2 ^ (~t.x3 & t.x4);
s->x4 = t.x4 ^ (~t.x0 & t.x1);
s->x1 = t.x1 ^ (~t.x2 & t.x3);
s->x3 = t.x3 ^ (~t.x4 & t.x0);
s->x1 ^= s->x0;
t.x1 = s->x1;
s->x1 = ROR64(s->x1, 39);
s->x3 ^= s->x2;
t.x2 = s->x2;
s->x2 = ROR64(s->x2, 1);
t.x4 = s->x4;
t.x2 ^= s->x2;
s->x2 = ROR64(s->x2, 6 - 1);
t.x3 = s->x3;
t.x1 ^= s->x1;
s->x3 = ROR64(s->x3, 10);
s->x0 ^= s->x4;
s->x4 = ROR64(s->x4, 7);
t.x3 ^= s->x3;
s->x2 ^= t.x2;
s->x1 = ROR64(s->x1, 61 - 39);
t.x0 = s->x0;
s->x2 = ~s->x2;
s->x3 = ROR64(s->x3, 17 - 10);
t.x4 ^= s->x4;
s->x4 = ROR64(s->x4, 41 - 7);
s->x3 ^= t.x3;
s->x1 ^= t.x1;
s->x0 = ROR64(s->x0, 19);
s->x4 ^= t.x4;
t.x0 ^= s->x0;
s->x0 = ROR64(s->x0, 28 - 19);
s->x0 ^= t.x0;
/* round constant */
s->x2 = XOR(s->x2, C);
/* s-box layer */
s->x0 = XOR(s->x0, s->x4);
s->x4 = XOR(s->x4, s->x3);
s->x2 = XOR(s->x2, s->x1);
t.x0 = XOR(s->x0, AND(NOT(s->x1), s->x2));
t.x2 = XOR(s->x2, AND(NOT(s->x3), s->x4));
t.x4 = XOR(s->x4, AND(NOT(s->x0), s->x1));
t.x1 = XOR(s->x1, AND(NOT(s->x2), s->x3));
t.x3 = XOR(s->x3, AND(NOT(s->x4), s->x0));
t.x1 = XOR(t.x1, t.x0);
t.x3 = XOR(t.x3, t.x2);
t.x0 = XOR(t.x0, t.x4);
/* linear layer */
s->x2 = XOR(t.x2, ROR(t.x2, 6 - 1));
s->x3 = XOR(t.x3, ROR(t.x3, 17 - 10));
s->x4 = XOR(t.x4, ROR(t.x4, 41 - 7));
s->x0 = XOR(t.x0, ROR(t.x0, 28 - 19));
s->x1 = XOR(t.x1, ROR(t.x1, 61 - 39));
s->x2 = XOR(t.x2, ROR(s->x2, 1));
s->x3 = XOR(t.x3, ROR(s->x3, 10));
s->x4 = XOR(t.x4, ROR(s->x4, 7));
s->x0 = XOR(t.x0, ROR(s->x0, 19));
s->x1 = XOR(t.x1, ROR(s->x1, 39));
s->x2 = NOT(s->x2);
printstate(" round output", s);
}
......
......@@ -4,6 +4,7 @@
#include <stdint.h>
#include "endian.h"
#include "forceinline.h"
typedef uint64_t word_t;
......@@ -12,69 +13,57 @@ typedef uint64_t word_t;
#define U64TOWORD
#define WORDTOU64
__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); }
forceinline word_t ROR(word_t x, int n) { return x >> n | x << (64 - n); }
__forceinline word_t NOT(word_t a) { return ~a; }
forceinline word_t NOT(word_t a) { return ~a; }
__forceinline word_t XOR(word_t a, word_t b) { return a ^ b; }
forceinline word_t XOR(word_t a, word_t b) { return a ^ b; }
__forceinline word_t AND(word_t a, word_t b) { return a & b; }
forceinline word_t AND(word_t a, word_t b) { return a & b; }
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
return lo2hi << 32 | hi2lo >> 32;
}
__forceinline uint8_t NOTZERO(word_t a, word_t b) {
forceinline int NOTZERO(word_t a, word_t b) {
uint64_t result = a | b;
result |= result >> 32;
result |= result >> 16;
result |= result >> 8;
return (uint8_t)result;
return ((((int)(result & 0xff) - 1) >> 8) & 1) - 1;
}
__forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); }
forceinline word_t PAD(int i) { return 0x80ull << (56 - 8 * i); }
__forceinline word_t CLEAR(word_t w, int n) {
forceinline word_t CLEAR(word_t w, int n) {
/* undefined for n == 0 */
uint64_t mask = 0x00ffffffffffffffull >> (n * 8 - 8);
return AND(w, WORD_T(mask));
return w & mask;
}
__forceinline uint64_t MASK(int n) {
forceinline uint64_t MASK(int n) {
/* undefined for n == 0 */
return ~0ull >> (64 - 8 * n);
}
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = *(uint64_t*)bytes;
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = *(uint64_t*)bytes & MASK(n);
return U64TOWORD(U64BIG(x));
return U64BIG(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
forceinline void STORE(uint8_t* bytes, word_t w, int n) {
*(uint64_t*)bytes &= ~MASK(n);
*(uint64_t*)bytes |= U64BIG(x);
*(uint64_t*)bytes |= U64BIG(w);
}
__forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
forceinline word_t LOADBYTES(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 0; i < n; ++i) ((uint8_t*)&x)[7 - i] = bytes[i];
return U64TOWORD(x);
return x;
}
__forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&x)[7 - i];
forceinline void STOREBYTES(uint8_t* bytes, word_t w, int n) {
for (int i = 0; i < n; ++i) bytes[i] = ((uint8_t*)&w)[7 - i];
}
#endif /* WORD_H_ */
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment