Commit ec7e6d42 by Sebastian Renner

Removed shared implementations (by request)

parent 9f8820cc
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16
#include "ascon.h"
#include "api.h"
#include "loadstore.h"
#include "permutations.h"
#include "printstate.h"
__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 20) {
XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4)));
k += 4;
}
XOR(*K1, LOAD64(k));
XOR(*K2, LOAD64(k + 8));
}
__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) {
word_t N0, N1;
word_t K0, K1, K2;
/* load nonce */
N0 = LOAD64(npub);
N1 = LOAD64(npub + 8);
/* load key */
loadkey(&K0, &K1, &K2, k);
/* initialization */
PINIT(s);
XOR(s->x0, IV);
if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0);
XOR(s->x1, K1);
XOR(s->x2, K2);
XOR(s->x3, N0);
XOR(s->x4, N1);
P12(s);
if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("initialization", s);
}
__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
word_t* restrict px;
/* process associated data */
if (adlen) {
while (adlen >= ASCON_RATE) {
XOR(s->x0, LOAD64(ad));
if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8));
PB(s);
ad += ASCON_RATE;
adlen -= ASCON_RATE;
}
/* final associated data block */
px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) {
XOR(s->x0, LOAD64(ad));
px = &s->x1;
ad += 8;
adlen -= 8;
}
if (adlen) XOR(*px, LOAD(ad, adlen));
XOR(*px, PAD(adlen));
PB(s);
}
XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
}
__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
word_t* restrict px;
/* process plaintext */
while (mlen >= ASCON_RATE) {
XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
if (ASCON_RATE == 16) {
XOR(s->x1, LOAD64(m + 8));
STORE64(c + 8, s->x1);
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
mlen -= ASCON_RATE;
}
/* final plaintext block */
px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) {
XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
px = &s->x1;
m += 8;
c += 8;
mlen -= 8;
}
if (mlen) {
XOR(*px, LOAD(m, mlen));
STORE(c, *px, mlen);
}
XOR(*px, PAD(mlen));
printstate("process plaintext", s);
}
__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
word_t* restrict px;
word_t cx;
/* process ciphertext */
while (clen >= ASCON_RATE) {
cx = LOAD64(c);
XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
if (ASCON_RATE == 16) {
cx = LOAD64(c + 8);
XOR(s->x1, cx);
STORE64(m + 8, s->x1);
s->x1 = cx;
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
clen -= ASCON_RATE;
}
/* final ciphertext block */
px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) {
cx = LOAD64(c);
XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
px = &s->x1;
m += 8;
c += 8;
clen -= 8;
}
if (clen) {
cx = LOAD(c, clen);
XOR(*px, cx);
STORE(m, *px, clen);
AND(*px, XMASK(clen));
XOR(*px, cx);
}
XOR(*px, PAD(clen));
printstate("process ciphertext", s);
}
__forceinline void final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2;
/* load key */
loadkey(&K0, &K1, &K2, k);
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
XOR(s->x1, K1);
XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
XOR(s->x2, K1);
XOR(s->x3, K2);
}
if (CRYPTO_KEYBYTES == 20) {
XOR(s->x1, KEYROT(K0, K1));
XOR(s->x2, KEYROT(K1, K2));
XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("finalization", s);
}
#if !ASCON_INLINE_MODE
#define INIT ascon_init
#define ABSORB ascon_absorb
#define ENCRYPT ascon_encrypt
#define DECRYPT ascon_decrypt
#define FINAL ascon_final
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
init(s, npub, k);
}
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
absorb(s, ad, adlen);
}
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) {
encrypt(s, c, m, mlen);
}
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) {
decrypt(s, m, c, clen);
}
void ascon_final(state_t* s, const uint8_t* k) { final(s, k); }
#else
#define INIT init
#define ABSORB absorb
#define ENCRYPT encrypt
#define DECRYPT decrypt
#define FINAL final
#endif
int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m,
uint64_t mlen, const uint8_t* ad, uint64_t adlen,
const uint8_t* nsec, const uint8_t* npub,
const uint8_t* k) {
state_t s;
(void)nsec;
*clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */
INIT(&s, npub, k);
ABSORB(&s, ad, adlen);
ENCRYPT(&s, c, m, mlen);
FINAL(&s, k);
/* set tag */
c += mlen;
STORE64(c, s.x3);
STORE64(c + 8, s.x4);
return 0;
}
int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec,
const uint8_t* c, uint64_t clen, const uint8_t* ad,
uint64_t adlen, const uint8_t* npub, const uint8_t* k) {
state_t s;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
*mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */
INIT(&s, npub, k);
ABSORB(&s, ad, adlen);
DECRYPT(&s, m, c, clen);
FINAL(&s, k);
/* verify tag (should be constant time, check compiler output) */
c += clen;
XOR(s.x3, LOAD64(c));
XOR(s.x4, LOAD64(c + 8));
if (NOTZERO(s.x3, s.x4)) {
*mlen = 0;
return -1;
}
return 0;
}
#ifndef ASCON_H_
#define ASCON_H_
#include <stdint.h>
#include "config.h"
#include "word.h"
typedef struct {
word_t x0, x1, x2, x3, x4;
word_t rx;
} state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k);
#endif // ASCON_H_
#ifndef CONFIG_H_
#define CONFIG_H_
/* inline the Ascon mode */
#ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1
#endif
/* inline the Ascon permutations */
#ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 0
#endif
/* single function for all permutations */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 0
#endif
/* unroll the permutation loops */
#ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 0
#endif
/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */
#ifndef ASCON_DATA_ACCESS
#define ASCON_DATA_ACCESS 'B'
#endif
/* Ascon mask generator { 'S'tdlib, 'X'orshift, 'R'andombytes } */
#ifndef ASCON_MASK_RNG
#define ASCON_MASK_RNG 'X'
#endif
/* mask key/data loads */
#ifndef ASCON_MASK_LOADS
#define ASCON_MASK_LOADS 0
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif
#endif /* CONFIG_H_ */
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* macros for big endian machines */
#ifndef NDEBUG
#pragma message("Using macros for big endian machines")
#endif
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
/* macros for little endian machines */
#ifndef NDEBUG
#pragma message("Using macros for little endian machines")
#endif
#define U64BIG(x) \
(((0x00000000000000FFULL & (x)) << 56) | \
((0x000000000000FF00ULL & (x)) << 40) | \
((0x0000000000FF0000ULL & (x)) << 24) | \
((0x00000000FF000000ULL & (x)) << 8) | \
((0x000000FF00000000ULL & (x)) >> 8) | \
((0x0000FF0000000000ULL & (x)) >> 24) | \
((0x00FF000000000000ULL & (x)) >> 40) | \
((0xFF00000000000000ULL & (x)) >> 56))
#define U32BIG(x) \
(((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \
((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24))
#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8))
#else
#error "Ascon byte order macros not defined in endian.h"
#endif
#endif /* ENDIAN_H_ */
#ifndef LOADSTORE_H_
#define LOADSTORE_H_
#include <stdint.h>
#include "config.h"
#include "endian.h"
#include "word.h"
/* 64-bit LSB mask (undefined for n == 0) */
#define MASK(n) (~0ull >> (64 - (n)))
/* get byte from Ascon 64-bit word */
#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i))))
/* set byte in Ascon 64-bit word */
#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i)))
#if ASCON_DATA_ACCESS == 'W'
#ifndef NDEBUG
#pragma message("Using wordwise data access")
#endif
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = U64BIG(*(uint64_t*)bytes);
return U64TOWORD(x);
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n));
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(8 * n);
*(uint64_t*)bytes |= U64BIG(x);
}
#elif ASCON_DATA_ACCESS == 'M'
#ifndef NDEBUG
#pragma message("Using memcpy to access data")
#endif
#include <string.h>
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
memcpy((uint8_t*)&x, bytes, n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = U64BIG(WORDTOU64(w));
memcpy(bytes, (uint8_t*)&x, n);
}
#elif ASCON_DATA_ACCESS == 'B'
#ifndef NDEBUG
#pragma message("Using bytewise data access")
#endif
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i;
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i;
}
#elif ASCON_DATA_ACCESS == 'H'
#ifndef NDEBUG
#pragma message("Using hybrid data access")
#endif
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
if (n == 8)
x = U64BIG(*(uint64_t*)bytes);
else
for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i;
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
if (n == 8)
*(uint64_t*)bytes = U64BIG(x);
else
for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i;
}
#else
#error "Ascon data access macro not defined correctly"
#endif
#endif /* LOADSTORE_H_ */
#include "permutations.h"
#include "round.h"
#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM
const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9},
{0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9},
{0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}};
#endif
#if ASCON_INLINE_PERM
#elif ASCON_SINGLE_PERM
void P(state_t* s, uint8_t rounds) {
printstate(" permutation input", s);
for (int i = START(rounds); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s) { P12ROUNDS(s); }
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16
void P8(state_t* s) { P8ROUNDS(s); }
#endif
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8
void P6(state_t* s) { P6ROUNDS(s); }
#endif
#endif
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
#include <stdint.h>
#include "api.h"
#include "ascon.h"
#include "printstate.h"
#include "round.h"
#define ASCON_128_KEYBYTES 16
#define ASCON_128A_KEYBYTES 16
#define ASCON_80PQ_KEYBYTES 20
#define ASCON_128_RATE 8
#define ASCON_128A_RATE 16
#define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32
#define ASCON_128_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_128A_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128A_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128A_PB_ROUNDS) << 32))
#define ASCON_80PQ_IV \
U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_HASH_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_HASH_BYTES * 8) << 0))
#define ASCON_XOF_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40))
#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull)
#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull)
#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull)
#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull)
#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull)
#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull)
#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull)
#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull)
#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull)
#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#if ASCON_RATE == 16
#define IV ASCON_128A_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define PB P8
#endif
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20
#define IV ASCON_80PQ_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#define START(n) (12 - n)
#if ASCON_UNROLL_LOOPS
__forceinline void P12ROUNDS(state_t* s) {
ROUND(s, 0xc, 0xc);
ROUND(s, 0x9, 0xc);
ROUND(s, 0xc, 0x9);
ROUND(s, 0x9, 0x9);
ROUND(s, 0x6, 0xc);
ROUND(s, 0x3, 0xc);
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
__forceinline void P8ROUNDS(state_t* s) {
ROUND(s, 0x6, 0xc);
ROUND(s, 0x3, 0xc);
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
__forceinline void P6ROUNDS(state_t* s) {
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
#else /* !ASCON_UNROLL_LOOPS */
extern const uint8_t constants[][2];
__forceinline void P12ROUNDS(state_t* s) {
for (int i = START(12); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
__forceinline void P8ROUNDS(state_t* s) {
for (int i = START(8); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
__forceinline void P6ROUNDS(state_t* s) {
for (int i = START(6); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#endif
#if ASCON_INLINE_PERM
__forceinline void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
__forceinline void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
__forceinline void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#elif ASCON_SINGLE_PERM
#define P12(s) P(s, 12)
#define P8(s) P(s, 8)
#define P6(s) P(s, 6)
void P(state_t* s, uint8_t rounds);
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#endif
#endif /* PERMUTATIONS_H_ */
#ifndef PRINTSTATE_H_
#define PRINTSTATE_H_
#ifdef NDEBUG
#define printword(text, w)
#define printstate(text, s)
#else
#include <inttypes.h>
#include <stdio.h>
#include "ascon.h"
__forceinline void printword(const char* text, const word_t x) {
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x));
}
__forceinline void printstate(const char* text, const state_t* s) {
printf("%s:\n", text);
printword(" x0", s->x0);
printword(" x1", s->x1);
printword(" x2", s->x2);
printword(" x3", s->x3);
printword(" x4", s->x4);
}
#endif
#endif /* PRINTSTATE_H_ */
#include "random.h"
#include <stdlib.h>
#include <time.h>
#include "config.h"
#if ASCON_MASK_RNG == 'R'
void randinit() { srand(time(0)); }
uint32_t rand32() {
uint32_t r;
randombytes(&r, 4);
return r;
}
uint64_t rand64() {
uint64_t r;
randombytes(&r, 8);
return r;
}
#elif ASCON_MASK_RNG == 'S'
void randinit() { srand(time(0)); }
uint32_t rand32() { return ((uint32_t)rand() << 21) ^ rand(); }
uint64_t rand64() {
return ((uint64_t)rand() << 43) ^ ((uint64_t)rand() << 21) ^ rand();
}
#elif ASCON_MASK_RNG == 'X'
uint32_t xorshift32;
uint64_t xorshift64;
void randinit() {
srand(time(0));
xorshift32 = rand();
xorshift64 = (uint64_t)rand() << 32 | rand();
}
uint32_t rand32() {
uint32_t x = xorshift32;
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
return xorshift32 = x;
}
uint64_t rand64() {
uint64_t x = xorshift64;
x ^= x << 13;
x ^= x >> 7;
x ^= x << 17;
return xorshift64 = x;
}
#endif
#ifndef RANDOM_H_
#define RANDOM_H_
#include <stdint.h>
void randinit();
uint32_t rand32();
uint64_t rand64();
#endif /* RANDOM_H_ */
#ifndef ROUND_H_
#define ROUND_H_
#include "ascon.h"
#include "printstate.h"
#include "random.h"
__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
*K0 = TOSHARES(0);
*K1 = TOSHARES(0);
*K2 = TOSHARES(0);
}
__forceinline void PINIT(state_t* s) {
randinit();
s->x0 = TOSHARES(0);
s->x1 = TOSHARES(0);
s->x2 = TOSHARES(0);
s->x3 = TOSHARES(0);
s->x4 = TOSHARES(0);
s->rx = TOSHARES(0);
}
#define TOFFOLI(a0, a1, b0, b1, c0, c1) \
do { \
(a0) ^= (~(b0)) & (c1); \
(a0) ^= (~(b0)) & (c0); \
(a1) ^= (b1) & (c1); \
(a1) ^= (b1) & (c0); \
} while (0)
__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) {
state_t t;
/* refresh randomness */
/* s->rx = TOSHARES(0); */
/* addition of round constant */
s->x2.s0.e ^= C_e;
/* substitution layer */
s->x0.s0.e ^= s->x4.s0.e;
s->x4.s0.e ^= s->x3.s0.e;
s->x2.s0.e ^= s->x1.s0.e;
s->x0.s1.e ^= s->x4.s1.e;
s->x4.s1.e ^= s->x3.s1.e;
s->x2.s1.e ^= s->x1.s1.e;
/* start of shared keccak s-box from https://eprint.iacr.org/2019/536 */
s->rx.s0.e = s->rx.s1.e;
TOFFOLI(s->rx.s0.e, s->rx.s1.e, s->x4.s0.e, s->x4.s1.e, s->x0.s0.e,
s->x0.s1.e);
TOFFOLI(s->x0.s0.e, s->x0.s1.e, s->x1.s0.e, s->x1.s1.e, s->x2.s0.e,
s->x2.s1.e);
TOFFOLI(s->x2.s0.e, s->x2.s1.e, s->x3.s0.e, s->x3.s1.e, s->x4.s0.e,
s->x4.s1.e);
TOFFOLI(s->x4.s0.e, s->x4.s1.e, s->x0.s0.e, s->x0.s1.e, s->x1.s0.e,
s->x1.s1.e);
TOFFOLI(s->x1.s0.e, s->x1.s1.e, s->x2.s0.e, s->x2.s1.e, s->x3.s0.e,
s->x3.s1.e);
s->x3.s1.e ^= s->rx.s1.e;
s->x3.s0.e ^= s->rx.s0.e;
/* end of shared keccak s-box */
s->x1.s0.e ^= s->x0.s0.e;
s->x0.s0.e ^= s->x4.s0.e;
s->x3.s0.e ^= s->x2.s0.e;
s->x2.s0.e = ~s->x2.s0.e;
s->x1.s1.e ^= s->x0.s1.e;
s->x0.s1.e ^= s->x4.s1.e;
s->x3.s1.e ^= s->x2.s1.e;
/* addition of round constant */
s->x2.s0.o ^= C_o;
/* substitution layer */
s->x0.s0.o ^= s->x4.s0.o;
s->x4.s0.o ^= s->x3.s0.o;
s->x2.s0.o ^= s->x1.s0.o;
s->x0.s1.o ^= s->x4.s1.o;
s->x4.s1.o ^= s->x3.s1.o;
s->x2.s1.o ^= s->x1.s1.o;
/* start of shared keccak s-box from https://eprint.iacr.org/2019/536 */
s->rx.s0.o = s->rx.s1.o;
TOFFOLI(s->rx.s0.o, s->rx.s1.o, s->x4.s0.o, s->x4.s1.o, s->x0.s0.o,
s->x0.s1.o);
TOFFOLI(s->x0.s0.o, s->x0.s1.o, s->x1.s0.o, s->x1.s1.o, s->x2.s0.o,
s->x2.s1.o);
TOFFOLI(s->x2.s0.o, s->x2.s1.o, s->x3.s0.o, s->x3.s1.o, s->x4.s0.o,
s->x4.s1.o);
TOFFOLI(s->x4.s0.o, s->x4.s1.o, s->x0.s0.o, s->x0.s1.o, s->x1.s0.o,
s->x1.s1.o);
TOFFOLI(s->x1.s0.o, s->x1.s1.o, s->x2.s0.o, s->x2.s1.o, s->x3.s0.o,
s->x3.s1.o);
s->x3.s1.o ^= s->rx.s1.o;
s->x3.s0.o ^= s->rx.s0.o;
/* end of shared keccak s-box */
s->x1.s0.o ^= s->x0.s0.o;
s->x0.s0.o ^= s->x4.s0.o;
s->x3.s0.o ^= s->x2.s0.o;
s->x2.s0.o = ~s->x2.s0.o;
s->x1.s1.o ^= s->x0.s1.o;
s->x0.s1.o ^= s->x4.s1.o;
s->x3.s1.o ^= s->x2.s1.o;
/* linear diffusion layer */
t.x0.s1.e = s->x0.s1.e ^ ROR32(s->x0.s1.o, 4);
t.x0.s1.o = s->x0.s1.o ^ ROR32(s->x0.s1.e, 5);
t.x1.s1.e = s->x1.s1.e ^ ROR32(s->x1.s1.e, 11);
t.x1.s1.o = s->x1.s1.o ^ ROR32(s->x1.s1.o, 11);
t.x2.s1.e = s->x2.s1.e ^ ROR32(s->x2.s1.o, 2);
t.x2.s1.o = s->x2.s1.o ^ ROR32(s->x2.s1.e, 3);
t.x3.s1.e = s->x3.s1.e ^ ROR32(s->x3.s1.o, 3);
t.x3.s1.o = s->x3.s1.o ^ ROR32(s->x3.s1.e, 4);
t.x4.s1.e = s->x4.s1.e ^ ROR32(s->x4.s1.e, 17);
t.x4.s1.o = s->x4.s1.o ^ ROR32(s->x4.s1.o, 17);
s->x0.s1.e ^= ROR32(t.x0.s1.o, 9);
s->x0.s1.o ^= ROR32(t.x0.s1.e, 10);
s->x1.s1.e ^= ROR32(t.x1.s1.o, 19);
s->x1.s1.o ^= ROR32(t.x1.s1.e, 20);
s->x2.s1.e ^= t.x2.s1.o;
s->x2.s1.o ^= ROR32(t.x2.s1.e, 1);
s->x3.s1.e ^= ROR32(t.x3.s1.e, 5);
s->x3.s1.o ^= ROR32(t.x3.s1.o, 5);
s->x4.s1.e ^= ROR32(t.x4.s1.o, 3);
s->x4.s1.o ^= ROR32(t.x4.s1.e, 4);
t.x0.s0.e = s->x0.s0.e ^ ROR32(s->x0.s0.o, 4);
t.x0.s0.o = s->x0.s0.o ^ ROR32(s->x0.s0.e, 5);
t.x1.s0.e = s->x1.s0.e ^ ROR32(s->x1.s0.e, 11);
t.x1.s0.o = s->x1.s0.o ^ ROR32(s->x1.s0.o, 11);
t.x2.s0.e = s->x2.s0.e ^ ROR32(s->x2.s0.o, 2);
t.x2.s0.o = s->x2.s0.o ^ ROR32(s->x2.s0.e, 3);
t.x3.s0.e = s->x3.s0.e ^ ROR32(s->x3.s0.o, 3);
t.x3.s0.o = s->x3.s0.o ^ ROR32(s->x3.s0.e, 4);
t.x4.s0.e = s->x4.s0.e ^ ROR32(s->x4.s0.e, 17);
t.x4.s0.o = s->x4.s0.o ^ ROR32(s->x4.s0.o, 17);
s->x0.s0.e ^= ROR32(t.x0.s0.o, 9);
s->x0.s0.o ^= ROR32(t.x0.s0.e, 10);
s->x1.s0.e ^= ROR32(t.x1.s0.o, 19);
s->x1.s0.o ^= ROR32(t.x1.s0.e, 20);
s->x2.s0.e ^= t.x2.s0.o;
s->x2.s0.o ^= ROR32(t.x2.s0.e, 1);
s->x3.s0.e ^= ROR32(t.x3.s0.e, 5);
s->x3.s0.o ^= ROR32(t.x3.s0.o, 5);
s->x4.s0.e ^= ROR32(t.x4.s0.o, 3);
s->x4.s0.o ^= ROR32(t.x4.s0.e, 4);
printstate(" round output", s);
}
#endif /* ROUND_H_ */
#include "word.h"
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
share_t TOBI32(share_t in) {
uint32_t r0, r1;
uint32_t lo = in.e;
uint32_t hi = in.o;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r0 = (lo & 0x0000FFFF) | (hi << 16);
r1 = (lo >> 16) | (hi & 0xFFFF0000);
in.e = r0;
in.o = r1;
return in;
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
share_t FROMBI32(share_t in) {
uint32_t r0 = in.e;
uint32_t r1 = in.o;
uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16);
uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
in.e = lo;
in.o = hi;
return in;
}
#ifndef WORD_H_
#define WORD_H_
#include <stdint.h>
#include "config.h"
#include "random.h"
typedef struct {
uint32_t e;
uint32_t o;
} share_t;
typedef struct {
share_t s0;
share_t s1;
} word_t;
__forceinline word_t WORD_T(uint64_t x) {
word_t w;
w.s0.e = (uint32_t)x;
w.s0.o = x >> 32;
w.s1.e = 0;
w.s1.o = 0;
return w;
}
__forceinline uint64_t UINT64_T(word_t w) {
return (uint64_t)w.s0.o << 32 | w.s0.e;
}
share_t TOBI32(share_t in);
share_t FROMBI32(share_t in);
__forceinline word_t TOSHARES(uint64_t in) {
uint32_t r0 = rand32();
uint32_t r1 = rand32();
word_t w;
w.s0.e = (uint32_t)in ^ r0;
w.s0.o = (in >> 32) ^ r1;
w.s1.e = r0;
w.s1.o = r1;
return w;
}
__forceinline uint64_t FROMSHARES(word_t in) {
return (uint64_t)(in.s0.o ^ in.s1.o) << 32 | (in.s0.e ^ in.s1.e);
}
__forceinline word_t U64TOWORD(uint64_t x) {
#if ASCON_MASK_LOADS
word_t w = TOSHARES(x);
w.s0 = TOBI32(w.s0);
w.s1 = TOBI32(w.s1);
return w;
#else
word_t w = WORD_T(x);
w.s0 = TOBI32(w.s0);
return w;
#endif
}
__forceinline uint64_t WORDTOU64(word_t w) {
w.s0 = FROMBI32(w.s0);
w.s1 = FROMBI32(w.s1);
return (uint64_t)FROMSHARES(w);
}
#define XOR(a, b) \
do { \
word_t tb = b; \
(a).s0.e ^= tb.s0.e; \
(a).s0.o ^= tb.s0.o; \
(a).s1.e ^= tb.s1.e; \
(a).s1.o ^= tb.s1.o; \
} while (0)
#define AND(a, b) \
do { \
word_t ta = a; \
word_t tb = b; \
(a).s0.e = (ta.s0.e & tb.s0.e) ^ (ta.s0.e & tb.s1.e); \
(a).s0.o = (ta.s0.o & tb.s0.e) ^ (ta.s0.o & tb.s1.o); \
(a).s1.e = (ta.s1.e & tb.s0.e) ^ (ta.s1.e & tb.s1.e); \
(a).s1.o = (ta.s1.o & tb.s0.o) ^ (ta.s1.o & tb.s1.o); \
} while (0)
__forceinline uint32_t ROR32(uint32_t x, int n) {
return x >> n | x << (32 - n);
}
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
word_t r;
r.s0.e = lo2hi.s0.e << 16 | hi2lo.s0.e >> 16;
r.s0.o = lo2hi.s0.o << 16 | hi2lo.s0.o >> 16;
r.s1.e = lo2hi.s1.e << 16 | hi2lo.s1.e >> 16;
r.s1.o = lo2hi.s1.o << 16 | hi2lo.s1.o >> 16;
return r;
}
__forceinline int NOTZERO(word_t a, word_t b) {
int result = 0;
for (int i = 0; i < 8; ++i)
result |= ((uint8_t*)&(a.s0))[i] ^ ((uint8_t*)&(a.s1))[i];
for (int i = 0; i < 8; ++i)
result |= ((uint8_t*)&(b.s0))[i] ^ ((uint8_t*)&(b.s1))[i];
return result;
}
/* set padding byte in 64-bit Ascon word */
__forceinline word_t PAD(int i) {
return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32);
}
/* byte mask for 64-bit Ascon word (1 <= n <= 8) */
__forceinline word_t XMASK(int n) {
uint32_t mask = 0x0fffffff >> (n * 4 - 4);
return WORD_T((uint64_t)mask << 32 | mask);
}
#endif /* WORD_H_ */
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16
#include "ascon.h"
#include "api.h"
#include "loadstore.h"
#include "permutations.h"
#include "printstate.h"
__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 20) {
XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4)));
k += 4;
}
XOR(*K1, LOAD64(k));
XOR(*K2, LOAD64(k + 8));
}
__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) {
word_t N0, N1;
word_t K0, K1, K2;
/* load nonce */
N0 = LOAD64(npub);
N1 = LOAD64(npub + 8);
/* load key */
loadkey(&K0, &K1, &K2, k);
/* initialization */
PINIT(s);
XOR(s->x0, IV);
if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0);
XOR(s->x1, K1);
XOR(s->x2, K2);
XOR(s->x3, N0);
XOR(s->x4, N1);
P12(s);
if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("initialization", s);
}
__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
word_t* restrict px;
/* process associated data */
if (adlen) {
while (adlen >= ASCON_RATE) {
XOR(s->x0, LOAD64(ad));
if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8));
PB(s);
ad += ASCON_RATE;
adlen -= ASCON_RATE;
}
/* final associated data block */
px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) {
XOR(s->x0, LOAD64(ad));
px = &s->x1;
ad += 8;
adlen -= 8;
}
if (adlen) XOR(*px, LOAD(ad, adlen));
XOR(*px, PAD(adlen));
PB(s);
}
XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
}
__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
word_t* restrict px;
/* process plaintext */
while (mlen >= ASCON_RATE) {
XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
if (ASCON_RATE == 16) {
XOR(s->x1, LOAD64(m + 8));
STORE64(c + 8, s->x1);
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
mlen -= ASCON_RATE;
}
/* final plaintext block */
px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) {
XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
px = &s->x1;
m += 8;
c += 8;
mlen -= 8;
}
if (mlen) {
XOR(*px, LOAD(m, mlen));
STORE(c, *px, mlen);
}
XOR(*px, PAD(mlen));
printstate("process plaintext", s);
}
__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
word_t* restrict px;
word_t cx;
/* process ciphertext */
while (clen >= ASCON_RATE) {
cx = LOAD64(c);
XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
if (ASCON_RATE == 16) {
cx = LOAD64(c + 8);
XOR(s->x1, cx);
STORE64(m + 8, s->x1);
s->x1 = cx;
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
clen -= ASCON_RATE;
}
/* final ciphertext block */
px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) {
cx = LOAD64(c);
XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
px = &s->x1;
m += 8;
c += 8;
clen -= 8;
}
if (clen) {
cx = LOAD(c, clen);
XOR(*px, cx);
STORE(m, *px, clen);
AND(*px, XMASK(clen));
XOR(*px, cx);
}
XOR(*px, PAD(clen));
printstate("process ciphertext", s);
}
__forceinline void final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2;
/* load key */
loadkey(&K0, &K1, &K2, k);
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
XOR(s->x1, K1);
XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
XOR(s->x2, K1);
XOR(s->x3, K2);
}
if (CRYPTO_KEYBYTES == 20) {
XOR(s->x1, KEYROT(K0, K1));
XOR(s->x2, KEYROT(K1, K2));
XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("finalization", s);
}
#if !ASCON_INLINE_MODE
#define INIT ascon_init
#define ABSORB ascon_absorb
#define ENCRYPT ascon_encrypt
#define DECRYPT ascon_decrypt
#define FINAL ascon_final
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
init(s, npub, k);
}
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
absorb(s, ad, adlen);
}
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) {
encrypt(s, c, m, mlen);
}
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) {
decrypt(s, m, c, clen);
}
void ascon_final(state_t* s, const uint8_t* k) { final(s, k); }
#else
#define INIT init
#define ABSORB absorb
#define ENCRYPT encrypt
#define DECRYPT decrypt
#define FINAL final
#endif
int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m,
uint64_t mlen, const uint8_t* ad, uint64_t adlen,
const uint8_t* nsec, const uint8_t* npub,
const uint8_t* k) {
state_t s;
(void)nsec;
*clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */
INIT(&s, npub, k);
ABSORB(&s, ad, adlen);
ENCRYPT(&s, c, m, mlen);
FINAL(&s, k);
/* set tag */
c += mlen;
STORE64(c, s.x3);
STORE64(c + 8, s.x4);
return 0;
}
int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec,
const uint8_t* c, uint64_t clen, const uint8_t* ad,
uint64_t adlen, const uint8_t* npub, const uint8_t* k) {
state_t s;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
*mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */
INIT(&s, npub, k);
ABSORB(&s, ad, adlen);
DECRYPT(&s, m, c, clen);
FINAL(&s, k);
/* verify tag (should be constant time, check compiler output) */
c += clen;
XOR(s.x3, LOAD64(c));
XOR(s.x4, LOAD64(c + 8));
if (NOTZERO(s.x3, s.x4)) {
*mlen = 0;
return -1;
}
return 0;
}
#ifndef ASCON_H_
#define ASCON_H_
#include <stdint.h>
#include "config.h"
#include "word.h"
typedef struct {
word_t x0, x1, x2, x3, x4;
word_t rx;
} state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k);
#endif // ASCON_H_
#ifndef CONFIG_H_
#define CONFIG_H_
/* inline the Ascon mode */
#ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1
#endif
/* inline the Ascon permutations */
#ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 0
#endif
/* single function for all permutations */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 0
#endif
/* unroll the permutation loops */
#ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 0
#endif
/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */
#ifndef ASCON_DATA_ACCESS
#define ASCON_DATA_ACCESS 'B'
#endif
/* Ascon mask generator { 'S'tdlib, 'X'orshift, 'R'andombytes } */
#ifndef ASCON_MASK_RNG
#define ASCON_MASK_RNG 'X'
#endif
/* mask key/data loads */
#ifndef ASCON_MASK_LOADS
#define ASCON_MASK_LOADS 0
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif
#endif /* CONFIG_H_ */
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* macros for big endian machines */
#ifndef NDEBUG
#pragma message("Using macros for big endian machines")
#endif
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
/* macros for little endian machines */
#ifndef NDEBUG
#pragma message("Using macros for little endian machines")
#endif
#define U64BIG(x) \
(((0x00000000000000FFULL & (x)) << 56) | \
((0x000000000000FF00ULL & (x)) << 40) | \
((0x0000000000FF0000ULL & (x)) << 24) | \
((0x00000000FF000000ULL & (x)) << 8) | \
((0x000000FF00000000ULL & (x)) >> 8) | \
((0x0000FF0000000000ULL & (x)) >> 24) | \
((0x00FF000000000000ULL & (x)) >> 40) | \
((0xFF00000000000000ULL & (x)) >> 56))
#define U32BIG(x) \
(((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \
((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24))
#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8))
#else
#error "Ascon byte order macros not defined in endian.h"
#endif
#endif /* ENDIAN_H_ */
#ifndef LOADSTORE_H_
#define LOADSTORE_H_
#include <stdint.h>
#include "config.h"
#include "endian.h"
#include "word.h"
/* 64-bit LSB mask (undefined for n == 0) */
#define MASK(n) (~0ull >> (64 - (n)))
/* get byte from Ascon 64-bit word */
#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i))))
/* set byte in Ascon 64-bit word */
#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i)))
#if ASCON_DATA_ACCESS == 'W'
#ifndef NDEBUG
#pragma message("Using wordwise data access")
#endif
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = U64BIG(*(uint64_t*)bytes);
return U64TOWORD(x);
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n));
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(8 * n);
*(uint64_t*)bytes |= U64BIG(x);
}
#elif ASCON_DATA_ACCESS == 'M'
#ifndef NDEBUG
#pragma message("Using memcpy to access data")
#endif
#include <string.h>
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
memcpy((uint8_t*)&x, bytes, n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = U64BIG(WORDTOU64(w));
memcpy(bytes, (uint8_t*)&x, n);
}
#elif ASCON_DATA_ACCESS == 'B'
#ifndef NDEBUG
#pragma message("Using bytewise data access")
#endif
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i;
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i;
}
#elif ASCON_DATA_ACCESS == 'H'
#ifndef NDEBUG
#pragma message("Using hybrid data access")
#endif
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
if (n == 8)
x = U64BIG(*(uint64_t*)bytes);
else
for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i;
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
if (n == 8)
*(uint64_t*)bytes = U64BIG(x);
else
for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i;
}
#else
#error "Ascon data access macro not defined correctly"
#endif
#endif /* LOADSTORE_H_ */
#include "permutations.h"
#include "round.h"
#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM
const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9},
{0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9},
{0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}};
#endif
#if ASCON_INLINE_PERM
#elif ASCON_SINGLE_PERM
void P(state_t* s, uint8_t rounds) {
printstate(" permutation input", s);
for (int i = START(rounds); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s) { P12ROUNDS(s); }
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16
void P8(state_t* s) { P8ROUNDS(s); }
#endif
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8
void P6(state_t* s) { P6ROUNDS(s); }
#endif
#endif
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
#include <stdint.h>
#include "api.h"
#include "ascon.h"
#include "printstate.h"
#include "round.h"
#define ASCON_128_KEYBYTES 16
#define ASCON_128A_KEYBYTES 16
#define ASCON_80PQ_KEYBYTES 20
#define ASCON_128_RATE 8
#define ASCON_128A_RATE 16
#define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32
#define ASCON_128_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_128A_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128A_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128A_PB_ROUNDS) << 32))
#define ASCON_80PQ_IV \
U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_HASH_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_HASH_BYTES * 8) << 0))
#define ASCON_XOF_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40))
#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull)
#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull)
#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull)
#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull)
#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull)
#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull)
#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull)
#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull)
#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull)
#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#if ASCON_RATE == 16
#define IV ASCON_128A_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define PB P8
#endif
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20
#define IV ASCON_80PQ_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#define START(n) (12 - n)
#if ASCON_UNROLL_LOOPS
__forceinline void P12ROUNDS(state_t* s) {
ROUND(s, 0xc, 0xc);
ROUND(s, 0x9, 0xc);
ROUND(s, 0xc, 0x9);
ROUND(s, 0x9, 0x9);
ROUND(s, 0x6, 0xc);
ROUND(s, 0x3, 0xc);
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
__forceinline void P8ROUNDS(state_t* s) {
ROUND(s, 0x6, 0xc);
ROUND(s, 0x3, 0xc);
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
__forceinline void P6ROUNDS(state_t* s) {
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
#else /* !ASCON_UNROLL_LOOPS */
extern const uint8_t constants[][2];
__forceinline void P12ROUNDS(state_t* s) {
for (int i = START(12); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
__forceinline void P8ROUNDS(state_t* s) {
for (int i = START(8); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
__forceinline void P6ROUNDS(state_t* s) {
for (int i = START(6); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#endif
#if ASCON_INLINE_PERM
__forceinline void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
__forceinline void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
__forceinline void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#elif ASCON_SINGLE_PERM
#define P12(s) P(s, 12)
#define P8(s) P(s, 8)
#define P6(s) P(s, 6)
void P(state_t* s, uint8_t rounds);
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#endif
#endif /* PERMUTATIONS_H_ */
#ifndef PRINTSTATE_H_
#define PRINTSTATE_H_
#ifdef NDEBUG
#define printword(text, w)
#define printstate(text, s)
#else
#include <inttypes.h>
#include <stdio.h>
#include "ascon.h"
__forceinline void printword(const char* text, const word_t x) {
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x));
}
__forceinline void printstate(const char* text, const state_t* s) {
printf("%s:\n", text);
printword(" x0", s->x0);
printword(" x1", s->x1);
printword(" x2", s->x2);
printword(" x3", s->x3);
printword(" x4", s->x4);
}
#endif
#endif /* PRINTSTATE_H_ */
#include "random.h"
#include <stdlib.h>
#include <time.h>
#include "config.h"
#if ASCON_MASK_RNG == 'R'
void randinit() { srand(time(0)); }
uint32_t rand32() {
uint32_t r;
randombytes(&r, 4);
return r;
}
uint64_t rand64() {
uint64_t r;
randombytes(&r, 8);
return r;
}
#elif ASCON_MASK_RNG == 'S'
void randinit() { srand(time(0)); }
uint32_t rand32() { return ((uint32_t)rand() << 21) ^ rand(); }
uint64_t rand64() {
return ((uint64_t)rand() << 43) ^ ((uint64_t)rand() << 21) ^ rand();
}
#elif ASCON_MASK_RNG == 'X'
uint32_t xorshift32;
uint64_t xorshift64;
void randinit() {
srand(time(0));
xorshift32 = rand();
xorshift64 = (uint64_t)rand() << 32 | rand();
}
uint32_t rand32() {
uint32_t x = xorshift32;
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
return xorshift32 = x;
}
uint64_t rand64() {
uint64_t x = xorshift64;
x ^= x << 13;
x ^= x >> 7;
x ^= x << 17;
return xorshift64 = x;
}
#endif
#ifndef RANDOM_H_
#define RANDOM_H_
#include <stdint.h>
void randinit();
uint32_t rand32();
uint64_t rand64();
#endif /* RANDOM_H_ */
#ifndef ROUND_H_
#define ROUND_H_
#include "ascon.h"
#include "printstate.h"
#include "random.h"
__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
*K0 = TOSHARES(0);
*K1 = TOSHARES(0);
*K2 = TOSHARES(0);
}
__forceinline void PINIT(state_t* s) {
randinit();
s->x0 = TOSHARES(0);
s->x1 = TOSHARES(0);
s->x2 = TOSHARES(0);
s->x3 = TOSHARES(0);
s->x4 = TOSHARES(0);
s->rx = TOSHARES(0);
}
#define TOFFOLI(a0, a1, a2, b0, b1, b2, c0, c1, c2) \
do { \
(a0) ^= (~(b0)) & (c0); \
(a0) ^= (b0) & (c2); \
(a0) ^= (b2) & (c0); \
(a1) ^= (~(b1)) & (c1); \
(a1) ^= (b1) & (c0); \
(a1) ^= (b0) & (c1); \
(a2) ^= (~(b2)) & (c2); \
(a2) ^= (b2) & (c1); \
(a2) ^= (b1) & (c2); \
} while (0)
__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) {
state_t t;
/* refresh randomness */
/* s->rx = TOSHARES(0); */
/* addition of round constant */
s->x2.s0.e ^= C_e;
/* substitution layer */
s->x0.s0.e ^= s->x4.s0.e;
s->x4.s0.e ^= s->x3.s0.e;
s->x2.s0.e ^= s->x1.s0.e;
s->x0.s1.e ^= s->x4.s1.e;
s->x4.s1.e ^= s->x3.s1.e;
s->x2.s1.e ^= s->x1.s1.e;
s->x0.s2.e ^= s->x4.s2.e;
s->x4.s2.e ^= s->x3.s2.e;
s->x2.s2.e ^= s->x1.s2.e;
/* start of shared keccak s-box from https://eprint.iacr.org/2019/536 */
s->rx.s2.e = s->rx.s0.e;
s->rx.s0.e ^= s->rx.s1.e;
TOFFOLI(s->rx.s0.e, s->rx.s1.e, s->rx.s2.e, s->x4.s0.e, s->x4.s1.e,
s->x4.s2.e, s->x0.s0.e, s->x0.s1.e, s->x0.s2.e);
TOFFOLI(s->x0.s0.e, s->x0.s1.e, s->x0.s2.e, s->x1.s0.e, s->x1.s1.e,
s->x1.s2.e, s->x2.s0.e, s->x2.s1.e, s->x2.s2.e);
TOFFOLI(s->x2.s0.e, s->x2.s1.e, s->x2.s2.e, s->x3.s0.e, s->x3.s1.e,
s->x3.s2.e, s->x4.s0.e, s->x4.s1.e, s->x4.s2.e);
TOFFOLI(s->x4.s0.e, s->x4.s1.e, s->x4.s2.e, s->x0.s0.e, s->x0.s1.e,
s->x0.s2.e, s->x1.s0.e, s->x1.s1.e, s->x1.s2.e);
TOFFOLI(s->x1.s0.e, s->x1.s1.e, s->x1.s2.e, s->x2.s0.e, s->x2.s1.e,
s->x2.s2.e, s->x3.s0.e, s->x3.s1.e, s->x3.s2.e);
s->x3.s2.e ^= s->rx.s2.e;
s->x3.s1.e ^= s->rx.s1.e;
s->x3.s0.e ^= s->rx.s0.e;
/* end of shared keccak s-box */
s->x1.s0.e ^= s->x0.s0.e;
s->x0.s0.e ^= s->x4.s0.e;
s->x3.s0.e ^= s->x2.s0.e;
s->x2.s0.e = ~s->x2.s0.e;
s->x1.s1.e ^= s->x0.s1.e;
s->x0.s1.e ^= s->x4.s1.e;
s->x3.s1.e ^= s->x2.s1.e;
s->x1.s2.e ^= s->x0.s2.e;
s->x0.s2.e ^= s->x4.s2.e;
s->x3.s2.e ^= s->x2.s2.e;
/* addition of round constant */
s->x2.s0.o ^= C_o;
/* substitution layer */
s->x0.s0.o ^= s->x4.s0.o;
s->x4.s0.o ^= s->x3.s0.o;
s->x2.s0.o ^= s->x1.s0.o;
s->x0.s1.o ^= s->x4.s1.o;
s->x4.s1.o ^= s->x3.s1.o;
s->x2.s1.o ^= s->x1.s1.o;
s->x0.s2.o ^= s->x4.s2.o;
s->x4.s2.o ^= s->x3.s2.o;
s->x2.s2.o ^= s->x1.s2.o;
/* start of shared keccak s-box from https://eprint.iacr.org/2019/536 */
s->rx.s2.o = s->rx.s0.o;
s->rx.s0.o ^= s->rx.s1.o;
TOFFOLI(s->rx.s0.o, s->rx.s1.o, s->rx.s2.o, s->x4.s0.o, s->x4.s1.o,
s->x4.s2.o, s->x0.s0.o, s->x0.s1.o, s->x0.s2.o);
TOFFOLI(s->x0.s0.o, s->x0.s1.o, s->x0.s2.o, s->x1.s0.o, s->x1.s1.o,
s->x1.s2.o, s->x2.s0.o, s->x2.s1.o, s->x2.s2.o);
TOFFOLI(s->x2.s0.o, s->x2.s1.o, s->x2.s2.o, s->x3.s0.o, s->x3.s1.o,
s->x3.s2.o, s->x4.s0.o, s->x4.s1.o, s->x4.s2.o);
TOFFOLI(s->x4.s0.o, s->x4.s1.o, s->x4.s2.o, s->x0.s0.o, s->x0.s1.o,
s->x0.s2.o, s->x1.s0.o, s->x1.s1.o, s->x1.s2.o);
TOFFOLI(s->x1.s0.o, s->x1.s1.o, s->x1.s2.o, s->x2.s0.o, s->x2.s1.o,
s->x2.s2.o, s->x3.s0.o, s->x3.s1.o, s->x3.s2.o);
s->x3.s2.o ^= s->rx.s2.o;
s->x3.s1.o ^= s->rx.s1.o;
s->x3.s0.o ^= s->rx.s0.o;
/* end of shared keccak s-box */
s->x1.s0.o ^= s->x0.s0.o;
s->x0.s0.o ^= s->x4.s0.o;
s->x3.s0.o ^= s->x2.s0.o;
s->x2.s0.o = ~s->x2.s0.o;
s->x1.s1.o ^= s->x0.s1.o;
s->x0.s1.o ^= s->x4.s1.o;
s->x3.s1.o ^= s->x2.s1.o;
s->x1.s2.o ^= s->x0.s2.o;
s->x0.s2.o ^= s->x4.s2.o;
s->x3.s2.o ^= s->x2.s2.o;
/* linear diffusion layer */
t.x0.s2.e = s->x0.s2.e ^ ROR32(s->x0.s2.o, 4);
t.x0.s2.o = s->x0.s2.o ^ ROR32(s->x0.s2.e, 5);
t.x1.s2.e = s->x1.s2.e ^ ROR32(s->x1.s2.e, 11);
t.x1.s2.o = s->x1.s2.o ^ ROR32(s->x1.s2.o, 11);
t.x2.s2.e = s->x2.s2.e ^ ROR32(s->x2.s2.o, 2);
t.x2.s2.o = s->x2.s2.o ^ ROR32(s->x2.s2.e, 3);
t.x3.s2.e = s->x3.s2.e ^ ROR32(s->x3.s2.o, 3);
t.x3.s2.o = s->x3.s2.o ^ ROR32(s->x3.s2.e, 4);
t.x4.s2.e = s->x4.s2.e ^ ROR32(s->x4.s2.e, 17);
t.x4.s2.o = s->x4.s2.o ^ ROR32(s->x4.s2.o, 17);
s->x0.s2.e ^= ROR32(t.x0.s2.o, 9);
s->x0.s2.o ^= ROR32(t.x0.s2.e, 10);
s->x1.s2.e ^= ROR32(t.x1.s2.o, 19);
s->x1.s2.o ^= ROR32(t.x1.s2.e, 20);
s->x2.s2.e ^= t.x2.s2.o;
s->x2.s2.o ^= ROR32(t.x2.s2.e, 1);
s->x3.s2.e ^= ROR32(t.x3.s2.e, 5);
s->x3.s2.o ^= ROR32(t.x3.s2.o, 5);
s->x4.s2.e ^= ROR32(t.x4.s2.o, 3);
s->x4.s2.o ^= ROR32(t.x4.s2.e, 4);
t.x0.s1.e = s->x0.s1.e ^ ROR32(s->x0.s1.o, 4);
t.x0.s1.o = s->x0.s1.o ^ ROR32(s->x0.s1.e, 5);
t.x1.s1.e = s->x1.s1.e ^ ROR32(s->x1.s1.e, 11);
t.x1.s1.o = s->x1.s1.o ^ ROR32(s->x1.s1.o, 11);
t.x2.s1.e = s->x2.s1.e ^ ROR32(s->x2.s1.o, 2);
t.x2.s1.o = s->x2.s1.o ^ ROR32(s->x2.s1.e, 3);
t.x3.s1.e = s->x3.s1.e ^ ROR32(s->x3.s1.o, 3);
t.x3.s1.o = s->x3.s1.o ^ ROR32(s->x3.s1.e, 4);
t.x4.s1.e = s->x4.s1.e ^ ROR32(s->x4.s1.e, 17);
t.x4.s1.o = s->x4.s1.o ^ ROR32(s->x4.s1.o, 17);
s->x0.s1.e ^= ROR32(t.x0.s1.o, 9);
s->x0.s1.o ^= ROR32(t.x0.s1.e, 10);
s->x1.s1.e ^= ROR32(t.x1.s1.o, 19);
s->x1.s1.o ^= ROR32(t.x1.s1.e, 20);
s->x2.s1.e ^= t.x2.s1.o;
s->x2.s1.o ^= ROR32(t.x2.s1.e, 1);
s->x3.s1.e ^= ROR32(t.x3.s1.e, 5);
s->x3.s1.o ^= ROR32(t.x3.s1.o, 5);
s->x4.s1.e ^= ROR32(t.x4.s1.o, 3);
s->x4.s1.o ^= ROR32(t.x4.s1.e, 4);
t.x0.s0.e = s->x0.s0.e ^ ROR32(s->x0.s0.o, 4);
t.x0.s0.o = s->x0.s0.o ^ ROR32(s->x0.s0.e, 5);
t.x1.s0.e = s->x1.s0.e ^ ROR32(s->x1.s0.e, 11);
t.x1.s0.o = s->x1.s0.o ^ ROR32(s->x1.s0.o, 11);
t.x2.s0.e = s->x2.s0.e ^ ROR32(s->x2.s0.o, 2);
t.x2.s0.o = s->x2.s0.o ^ ROR32(s->x2.s0.e, 3);
t.x3.s0.e = s->x3.s0.e ^ ROR32(s->x3.s0.o, 3);
t.x3.s0.o = s->x3.s0.o ^ ROR32(s->x3.s0.e, 4);
t.x4.s0.e = s->x4.s0.e ^ ROR32(s->x4.s0.e, 17);
t.x4.s0.o = s->x4.s0.o ^ ROR32(s->x4.s0.o, 17);
s->x0.s0.e ^= ROR32(t.x0.s0.o, 9);
s->x0.s0.o ^= ROR32(t.x0.s0.e, 10);
s->x1.s0.e ^= ROR32(t.x1.s0.o, 19);
s->x1.s0.o ^= ROR32(t.x1.s0.e, 20);
s->x2.s0.e ^= t.x2.s0.o;
s->x2.s0.o ^= ROR32(t.x2.s0.e, 1);
s->x3.s0.e ^= ROR32(t.x3.s0.e, 5);
s->x3.s0.o ^= ROR32(t.x3.s0.o, 5);
s->x4.s0.e ^= ROR32(t.x4.s0.o, 3);
s->x4.s0.o ^= ROR32(t.x4.s0.e, 4);
printstate(" round output", s);
}
#endif /* ROUND_H_ */
#include "word.h"
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
share_t TOBI32(share_t in) {
uint32_t r0, r1;
uint32_t lo = in.e;
uint32_t hi = in.o;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r0 = (lo & 0x0000FFFF) | (hi << 16);
r1 = (lo >> 16) | (hi & 0xFFFF0000);
in.e = r0;
in.o = r1;
return in;
}
/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
share_t FROMBI32(share_t in) {
uint32_t r0 = in.e;
uint32_t r1 = in.o;
uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16);
uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
in.e = lo;
in.o = hi;
return in;
}
#ifndef WORD_H_
#define WORD_H_
#include <stdint.h>
#include "config.h"
#include "random.h"
typedef struct {
uint32_t e;
uint32_t o;
} share_t;
typedef struct {
share_t s0;
share_t s1;
share_t s2;
} word_t;
__forceinline word_t WORD_T(uint64_t x) {
word_t w;
w.s0.e = (uint32_t)x;
w.s0.o = x >> 32;
w.s1.e = 0;
w.s1.o = 0;
w.s2.e = 0;
w.s2.o = 0;
return w;
}
__forceinline uint64_t UINT64_T(word_t w) {
return (uint64_t)w.s0.o << 32 | w.s0.e;
}
share_t TOBI32(share_t in);
share_t FROMBI32(share_t in);
__forceinline word_t TOSHARES(uint64_t in) {
uint32_t r0 = rand32();
uint32_t r1 = rand32();
uint32_t r2 = rand32();
uint32_t r3 = rand32();
word_t w;
w.s0.e = (uint32_t)in ^ r0 ^ r2;
w.s0.o = (in >> 32) ^ r1 ^ r3;
w.s1.e = r0;
w.s1.o = r1;
w.s2.e = r2;
w.s2.o = r3;
return w;
}
__forceinline uint64_t FROMSHARES(word_t in) {
return (uint64_t)(in.s0.o ^ in.s1.o ^ in.s2.o) << 32 |
(in.s0.e ^ in.s1.e ^ in.s2.e);
}
__forceinline word_t U64TOWORD(uint64_t x) {
#if ASCON_MASK_LOADS
word_t w = TOSHARES(x);
w.s0 = TOBI32(w.s0);
w.s1 = TOBI32(w.s1);
w.s2 = TOBI32(w.s2);
return w;
#else
word_t w = WORD_T(x);
w.s0 = TOBI32(w.s0);
return w;
#endif
}
__forceinline uint64_t WORDTOU64(word_t w) {
w.s0 = FROMBI32(w.s0);
w.s1 = FROMBI32(w.s1);
w.s2 = FROMBI32(w.s2);
return (uint64_t)FROMSHARES(w);
}
#define XOR(a, b) \
do { \
word_t tb = b; \
(a).s0.e ^= tb.s0.e; \
(a).s0.o ^= tb.s0.o; \
(a).s1.e ^= tb.s1.e; \
(a).s1.o ^= tb.s1.o; \
(a).s2.e ^= tb.s2.e; \
(a).s2.o ^= tb.s2.o; \
} while (0)
#define AND(a, b) \
do { \
word_t ta = a; \
word_t tb = b; \
(a).s0.e = \
(ta.s0.e & tb.s0.e) ^ (ta.s0.e & tb.s1.e) ^ (ta.s0.e & tb.s2.e); \
(a).s0.o = \
(ta.s0.o & tb.s0.o) ^ (ta.s0.o & tb.s1.o) ^ (ta.s0.o & tb.s2.o); \
(a).s1.e = \
(ta.s1.e & tb.s0.e) ^ (ta.s1.e & tb.s1.e) ^ (ta.s1.e & tb.s2.e); \
(a).s1.o = \
(ta.s1.o & tb.s0.o) ^ (ta.s1.o & tb.s1.o) ^ (ta.s1.o & tb.s2.o); \
(a).s2.e = \
(ta.s2.e & tb.s0.e) ^ (ta.s2.e & tb.s1.e) ^ (ta.s2.e & tb.s2.e); \
(a).s2.o = \
(ta.s2.o & tb.s0.o) ^ (ta.s2.o & tb.s1.o) ^ (ta.s2.o & tb.s2.o); \
} while (0)
__forceinline uint32_t ROR32(uint32_t x, int n) {
return x >> n | x << (32 - n);
}
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
word_t r;
r.s0.e = lo2hi.s0.e << 16 | hi2lo.s0.e >> 16;
r.s0.o = lo2hi.s0.o << 16 | hi2lo.s0.o >> 16;
r.s1.e = lo2hi.s1.e << 16 | hi2lo.s1.e >> 16;
r.s1.o = lo2hi.s1.o << 16 | hi2lo.s1.o >> 16;
r.s2.e = lo2hi.s2.e << 16 | hi2lo.s2.e >> 16;
r.s2.o = lo2hi.s2.o << 16 | hi2lo.s2.o >> 16;
return r;
}
__forceinline int NOTZERO(word_t a, word_t b) {
int result = 0;
for (int i = 0; i < 8; ++i)
result |= ((uint8_t*)&(a.s0))[i] ^ ((uint8_t*)&(a.s1))[i] ^
((uint8_t*)&(a.s2))[i];
for (int i = 0; i < 8; ++i)
result |= ((uint8_t*)&(b.s0))[i] ^ ((uint8_t*)&(b.s1))[i] ^
((uint8_t*)&(b.s2))[i];
return result;
}
/* set padding byte in 64-bit Ascon word */
__forceinline word_t PAD(int i) {
return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32);
}
/* byte mask for 64-bit Ascon word (1 <= n <= 8) */
__forceinline word_t XMASK(int n) {
uint32_t mask = 0x0fffffff >> (n * 4 - 4);
return WORD_T((uint64_t)mask << 32 | mask);
}
#endif /* WORD_H_ */
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16
#include "ascon.h"
#include "api.h"
#include "loadstore.h"
#include "permutations.h"
#include "printstate.h"
__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 20) {
XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4)));
k += 4;
}
XOR(*K1, LOAD64(k));
XOR(*K2, LOAD64(k + 8));
}
__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) {
word_t N0, N1;
word_t K0, K1, K2;
/* load nonce */
N0 = LOAD64(npub);
N1 = LOAD64(npub + 8);
/* load key */
loadkey(&K0, &K1, &K2, k);
/* initialization */
PINIT(s);
XOR(s->x0, IV);
if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0);
XOR(s->x1, K1);
XOR(s->x2, K2);
XOR(s->x3, N0);
XOR(s->x4, N1);
P12(s);
if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("initialization", s);
}
__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
word_t* restrict px;
/* process associated data */
if (adlen) {
while (adlen >= ASCON_RATE) {
XOR(s->x0, LOAD64(ad));
if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8));
PB(s);
ad += ASCON_RATE;
adlen -= ASCON_RATE;
}
/* final associated data block */
px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) {
XOR(s->x0, LOAD64(ad));
px = &s->x1;
ad += 8;
adlen -= 8;
}
if (adlen) XOR(*px, LOAD(ad, adlen));
XOR(*px, PAD(adlen));
PB(s);
}
XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
}
__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
word_t* restrict px;
/* process plaintext */
while (mlen >= ASCON_RATE) {
XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
if (ASCON_RATE == 16) {
XOR(s->x1, LOAD64(m + 8));
STORE64(c + 8, s->x1);
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
mlen -= ASCON_RATE;
}
/* final plaintext block */
px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) {
XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
px = &s->x1;
m += 8;
c += 8;
mlen -= 8;
}
if (mlen) {
XOR(*px, LOAD(m, mlen));
STORE(c, *px, mlen);
}
XOR(*px, PAD(mlen));
printstate("process plaintext", s);
}
__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
word_t* restrict px;
word_t cx;
/* process ciphertext */
while (clen >= ASCON_RATE) {
cx = LOAD64(c);
XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
if (ASCON_RATE == 16) {
cx = LOAD64(c + 8);
XOR(s->x1, cx);
STORE64(m + 8, s->x1);
s->x1 = cx;
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
clen -= ASCON_RATE;
}
/* final ciphertext block */
px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) {
cx = LOAD64(c);
XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
px = &s->x1;
m += 8;
c += 8;
clen -= 8;
}
if (clen) {
cx = LOAD(c, clen);
XOR(*px, cx);
STORE(m, *px, clen);
AND(*px, XMASK(clen));
XOR(*px, cx);
}
XOR(*px, PAD(clen));
printstate("process ciphertext", s);
}
__forceinline void final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2;
/* load key */
loadkey(&K0, &K1, &K2, k);
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
XOR(s->x1, K1);
XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
XOR(s->x2, K1);
XOR(s->x3, K2);
}
if (CRYPTO_KEYBYTES == 20) {
XOR(s->x1, KEYROT(K0, K1));
XOR(s->x2, KEYROT(K1, K2));
XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("finalization", s);
}
#if !ASCON_INLINE_MODE
#define INIT ascon_init
#define ABSORB ascon_absorb
#define ENCRYPT ascon_encrypt
#define DECRYPT ascon_decrypt
#define FINAL ascon_final
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
init(s, npub, k);
}
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
absorb(s, ad, adlen);
}
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) {
encrypt(s, c, m, mlen);
}
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) {
decrypt(s, m, c, clen);
}
void ascon_final(state_t* s, const uint8_t* k) { final(s, k); }
#else
#define INIT init
#define ABSORB absorb
#define ENCRYPT encrypt
#define DECRYPT decrypt
#define FINAL final
#endif
int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m,
uint64_t mlen, const uint8_t* ad, uint64_t adlen,
const uint8_t* nsec, const uint8_t* npub,
const uint8_t* k) {
state_t s;
(void)nsec;
*clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */
INIT(&s, npub, k);
ABSORB(&s, ad, adlen);
ENCRYPT(&s, c, m, mlen);
FINAL(&s, k);
/* set tag */
c += mlen;
STORE64(c, s.x3);
STORE64(c + 8, s.x4);
return 0;
}
int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec,
const uint8_t* c, uint64_t clen, const uint8_t* ad,
uint64_t adlen, const uint8_t* npub, const uint8_t* k) {
state_t s;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
*mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */
INIT(&s, npub, k);
ABSORB(&s, ad, adlen);
DECRYPT(&s, m, c, clen);
FINAL(&s, k);
/* verify tag (should be constant time, check compiler output) */
c += clen;
XOR(s.x3, LOAD64(c));
XOR(s.x4, LOAD64(c + 8));
if (NOTZERO(s.x3, s.x4)) {
*mlen = 0;
return -1;
}
return 0;
}
#ifndef ASCON_H_
#define ASCON_H_
#include <stdint.h>
#include "config.h"
#include "word.h"
typedef struct {
word_t x0, x1, x2, x3, x4;
word_t rx;
} state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k);
#endif // ASCON_H_
#ifndef CONFIG_H_
#define CONFIG_H_
/* inline the Ascon mode */
#ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1
#endif
/* inline the Ascon permutations */
#ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 1
#endif
/* single function for all permutations */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 0
#endif
/* unroll the permutation loops */
#ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 1
#endif
/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */
#ifndef ASCON_DATA_ACCESS
#define ASCON_DATA_ACCESS 'H'
#endif
/* Ascon mask generator { 'S'tdlib, 'X'orshift, 'R'andombytes } */
#ifndef ASCON_MASK_RNG
#define ASCON_MASK_RNG 'X'
#endif
/* mask key/data loads */
#ifndef ASCON_MASK_LOADS
#define ASCON_MASK_LOADS 0
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif
#endif /* CONFIG_H_ */
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* macros for big endian machines */
#ifndef NDEBUG
#pragma message("Using macros for big endian machines")
#endif
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
/* macros for little endian machines */
#ifndef NDEBUG
#pragma message("Using macros for little endian machines")
#endif
#define U64BIG(x) \
(((0x00000000000000FFULL & (x)) << 56) | \
((0x000000000000FF00ULL & (x)) << 40) | \
((0x0000000000FF0000ULL & (x)) << 24) | \
((0x00000000FF000000ULL & (x)) << 8) | \
((0x000000FF00000000ULL & (x)) >> 8) | \
((0x0000FF0000000000ULL & (x)) >> 24) | \
((0x00FF000000000000ULL & (x)) >> 40) | \
((0xFF00000000000000ULL & (x)) >> 56))
#define U32BIG(x) \
(((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \
((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24))
#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8))
#else
#error "Ascon byte order macros not defined in endian.h"
#endif
#endif /* ENDIAN_H_ */
#ifndef LOADSTORE_H_
#define LOADSTORE_H_
#include <stdint.h>
#include "config.h"
#include "endian.h"
#include "word.h"
/* 64-bit LSB mask (undefined for n == 0) */
#define MASK(n) (~0ull >> (64 - (n)))
/* get byte from Ascon 64-bit word */
#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i))))
/* set byte in Ascon 64-bit word */
#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i)))
#if ASCON_DATA_ACCESS == 'W'
#ifndef NDEBUG
#pragma message("Using wordwise data access")
#endif
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = U64BIG(*(uint64_t*)bytes);
return U64TOWORD(x);
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n));
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(8 * n);
*(uint64_t*)bytes |= U64BIG(x);
}
#elif ASCON_DATA_ACCESS == 'M'
#ifndef NDEBUG
#pragma message("Using memcpy to access data")
#endif
#include <string.h>
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
memcpy((uint8_t*)&x, bytes, n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = U64BIG(WORDTOU64(w));
memcpy(bytes, (uint8_t*)&x, n);
}
#elif ASCON_DATA_ACCESS == 'B'
#ifndef NDEBUG
#pragma message("Using bytewise data access")
#endif
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i;
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i;
}
#elif ASCON_DATA_ACCESS == 'H'
#ifndef NDEBUG
#pragma message("Using hybrid data access")
#endif
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
if (n == 8)
x = U64BIG(*(uint64_t*)bytes);
else
for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i;
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
if (n == 8)
*(uint64_t*)bytes = U64BIG(x);
else
for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i;
}
#else
#error "Ascon data access macro not defined correctly"
#endif
#endif /* LOADSTORE_H_ */
#include "permutations.h"
#include "round.h"
#if ASCON_INLINE_PERM
#elif ASCON_SINGLE_PERM
void P(state_t* s, uint8_t rounds) {
printstate(" permutation input", s);
for (int i = START(rounds); i > 0x4a; i -= 0x0f) ROUND(s, i);
}
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s) { P12ROUNDS(s); }
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16
void P8(state_t* s) { P8ROUNDS(s); }
#endif
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8
void P6(state_t* s) { P6ROUNDS(s); }
#endif
#endif
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
#include <stdint.h>
#include "api.h"
#include "ascon.h"
#include "printstate.h"
#include "round.h"
#define ASCON_128_KEYBYTES 16
#define ASCON_128A_KEYBYTES 16
#define ASCON_80PQ_KEYBYTES 20
#define ASCON_128_RATE 8
#define ASCON_128A_RATE 16
#define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32
#define ASCON_128_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_128A_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128A_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128A_PB_ROUNDS) << 32))
#define ASCON_80PQ_IV \
U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_HASH_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_HASH_BYTES * 8) << 0))
#define ASCON_XOF_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40))
#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull)
#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull)
#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull)
#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull)
#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull)
#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull)
#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull)
#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull)
#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull)
#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#if ASCON_RATE == 16
#define IV ASCON_128A_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define PB P8
#endif
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20
#define IV ASCON_80PQ_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#define START(n) ((3 + (n)) << 4 | (12 - (n)))
#if ASCON_UNROLL_LOOPS
__forceinline void P12ROUNDS(state_t* s) {
ROUND(s, 0xf0);
ROUND(s, 0xe1);
ROUND(s, 0xd2);
ROUND(s, 0xc3);
ROUND(s, 0xb4);
ROUND(s, 0xa5);
ROUND(s, 0x96);
ROUND(s, 0x87);
ROUND(s, 0x78);
ROUND(s, 0x69);
ROUND(s, 0x5a);
ROUND(s, 0x4b);
}
__forceinline void P8ROUNDS(state_t* s) {
ROUND(s, 0xb4);
ROUND(s, 0xa5);
ROUND(s, 0x96);
ROUND(s, 0x87);
ROUND(s, 0x78);
ROUND(s, 0x69);
ROUND(s, 0x5a);
ROUND(s, 0x4b);
}
__forceinline void P6ROUNDS(state_t* s) {
ROUND(s, 0x96);
ROUND(s, 0x87);
ROUND(s, 0x78);
ROUND(s, 0x69);
ROUND(s, 0x5a);
ROUND(s, 0x4b);
}
#else /* !ASCON_UNROLL_LOOPS */
__forceinline void P12ROUNDS(state_t* s) {
for (int i = START(12); i > 0x4a; i -= 0x0f) ROUND(s, i);
}
__forceinline void P8ROUNDS(state_t* s) {
for (int i = START(8); i > 0x4a; i -= 0x0f) ROUND(s, i);
}
__forceinline void P6ROUNDS(state_t* s) {
for (int i = START(6); i > 0x4a; i -= 0x0f) ROUND(s, i);
}
#endif
#if ASCON_INLINE_PERM
__forceinline void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
__forceinline void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
__forceinline void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#elif ASCON_SINGLE_PERM
#define P12(s) P(s, 12)
#define P8(s) P(s, 8)
#define P6(s) P(s, 6)
void P(state_t* s, uint8_t rounds);
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#endif
#endif /* PERMUTATIONS_H_ */
#ifndef PRINTSTATE_H_
#define PRINTSTATE_H_
#ifdef NDEBUG
#define printword(text, w)
#define printstate(text, s)
#else
#include <inttypes.h>
#include <stdio.h>
#include "ascon.h"
__forceinline void printword(const char* text, const word_t x) {
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x));
}
__forceinline void printstate(const char* text, const state_t* s) {
printf("%s:\n", text);
printword(" x0", s->x0);
printword(" x1", s->x1);
printword(" x2", s->x2);
printword(" x3", s->x3);
printword(" x4", s->x4);
}
#endif
#endif /* PRINTSTATE_H_ */
#include "random.h"
#include <stdlib.h>
#include <time.h>
#include "config.h"
#if ASCON_MASK_RNG == 'R'
void randinit() { srand(time(0)); }
uint32_t rand32() {
uint32_t r;
randombytes(&r, 4);
return r;
}
uint64_t rand64() {
uint64_t r;
randombytes(&r, 8);
return r;
}
#elif ASCON_MASK_RNG == 'S'
void randinit() { srand(time(0)); }
uint32_t rand32() { return ((uint32_t)rand() << 21) ^ rand(); }
uint64_t rand64() {
return ((uint64_t)rand() << 43) ^ ((uint64_t)rand() << 21) ^ rand();
}
#elif ASCON_MASK_RNG == 'X'
uint32_t xorshift32;
uint64_t xorshift64;
void randinit() {
srand(time(0));
xorshift32 = rand();
xorshift64 = (uint64_t)rand() << 32 | rand();
}
uint32_t rand32() {
uint32_t x = xorshift32;
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
return xorshift32 = x;
}
uint64_t rand64() {
uint64_t x = xorshift64;
x ^= x << 13;
x ^= x >> 7;
x ^= x << 17;
return xorshift64 = x;
}
#endif
#ifndef RANDOM_H_
#define RANDOM_H_
#include <stdint.h>
void randinit();
uint32_t rand32();
uint64_t rand64();
#endif /* RANDOM_H_ */
#ifndef ROUND_H_
#define ROUND_H_
#include "ascon.h"
#include "printstate.h"
#include "random.h"
__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
*K0 = TOSHARES(0);
*K1 = TOSHARES(0);
*K2 = TOSHARES(0);
}
__forceinline void PINIT(state_t* s) {
randinit();
s->x0 = TOSHARES(0);
s->x1 = TOSHARES(0);
s->x2 = TOSHARES(0);
s->x3 = TOSHARES(0);
s->x4 = TOSHARES(0);
s->rx = TOSHARES(0);
}
#define TOFFOLI(a0, a1, b0, b1, c0, c1) \
do { \
(a0) ^= (~(b0)) & (c1); \
(a0) ^= (~(b0)) & (c0); \
(a1) ^= (b1) & (c1); \
(a1) ^= (b1) & (c0); \
} while (0)
__forceinline void ROUND(state_t* s, uint64_t C) {
/* refresh randomness */
/* s->rx = TOSHARES(0); */
/* addition of round constant */
s->x2.s0 ^= C;
/* substitution layer */
s->x0.s0 ^= s->x4.s0;
s->x4.s0 ^= s->x3.s0;
s->x2.s0 ^= s->x1.s0;
s->x0.s1 ^= s->x4.s1;
s->x4.s1 ^= s->x3.s1;
s->x2.s1 ^= s->x1.s1;
/* start of shared keccak s-box from https://eprint.iacr.org/2019/536 */
s->rx.s0 = s->rx.s1;
TOFFOLI(s->rx.s0, s->rx.s1, s->x4.s0, s->x4.s1, s->x0.s0, s->x0.s1);
TOFFOLI(s->x0.s0, s->x0.s1, s->x1.s0, s->x1.s1, s->x2.s0, s->x2.s1);
TOFFOLI(s->x2.s0, s->x2.s1, s->x3.s0, s->x3.s1, s->x4.s0, s->x4.s1);
TOFFOLI(s->x4.s0, s->x4.s1, s->x0.s0, s->x0.s1, s->x1.s0, s->x1.s1);
TOFFOLI(s->x1.s0, s->x1.s1, s->x2.s0, s->x2.s1, s->x3.s0, s->x3.s1);
s->x3.s1 ^= s->rx.s1;
s->x3.s0 ^= s->rx.s0;
/* end of shared keccak s-box */
s->x1.s0 ^= s->x0.s0;
s->x0.s0 ^= s->x4.s0;
s->x3.s0 ^= s->x2.s0;
s->x2.s0 = ~s->x2.s0;
s->x1.s1 ^= s->x0.s1;
s->x0.s1 ^= s->x4.s1;
s->x3.s1 ^= s->x2.s1;
/* linear diffusion layer */
s->x0.s1 ^= ROR64(s->x0.s1, 19) ^ ROR64(s->x0.s1, 28);
s->x1.s1 ^= ROR64(s->x1.s1, 61) ^ ROR64(s->x1.s1, 39);
s->x2.s1 ^= ROR64(s->x2.s1, 1) ^ ROR64(s->x2.s1, 6);
s->x3.s1 ^= ROR64(s->x3.s1, 10) ^ ROR64(s->x3.s1, 17);
s->x4.s1 ^= ROR64(s->x4.s1, 7) ^ ROR64(s->x4.s1, 41);
s->x0.s0 ^= ROR64(s->x0.s0, 19) ^ ROR64(s->x0.s0, 28);
s->x1.s0 ^= ROR64(s->x1.s0, 61) ^ ROR64(s->x1.s0, 39);
s->x2.s0 ^= ROR64(s->x2.s0, 1) ^ ROR64(s->x2.s0, 6);
s->x3.s0 ^= ROR64(s->x3.s0, 10) ^ ROR64(s->x3.s0, 17);
s->x4.s0 ^= ROR64(s->x4.s0, 7) ^ ROR64(s->x4.s0, 41);
printstate(" round output", s);
}
#endif /* ROUND_H_ */
#ifndef WORD_H_
#define WORD_H_
#include <stdint.h>
#include "config.h"
#include "random.h"
typedef uint64_t share_t;
typedef struct {
share_t s0;
share_t s1;
} word_t;
__forceinline word_t WORD_T(uint64_t x) { return (word_t){x, 0}; }
__forceinline uint64_t UINT64_T(word_t w) { return w.s0; }
__forceinline word_t TOSHARES(share_t in) {
uint64_t r0 = rand64();
return (word_t){in ^ r0, r0};
}
__forceinline share_t FROMSHARES(word_t in) { return in.s0 ^ in.s1; }
__forceinline word_t U64TOWORD(uint64_t x) {
#if ASCON_MASK_LOADS
return TOSHARES(x);
#else
return WORD_T(x);
#endif
}
__forceinline uint64_t WORDTOU64(word_t w) { return FROMSHARES(w); }
#define XOR(a, b) \
do { \
word_t t = b; \
(a).s0 ^= t.s0; \
(a).s1 ^= t.s1; \
} while (0)
#define AND(a, b) \
do { \
word_t ta = a; \
word_t tb = b; \
(a).s0 = (ta.s0 & tb.s0) ^ (ta.s0 & tb.s1); \
(a).s1 = (ta.s1 & tb.s0) ^ (ta.s1 & tb.s1); \
} while (0)
__forceinline uint64_t ROR64(uint64_t x, int n) {
return x >> n | x << (64 - n);
}
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
word_t r;
r.s0 = lo2hi.s0 << 32 | hi2lo.s0 >> 32;
r.s1 = lo2hi.s1 << 32 | hi2lo.s1 >> 32;
return r;
}
__forceinline int NOTZERO(word_t a, word_t b) {
int result = 0;
for (int i = 0; i < 8; ++i)
result |= ((uint8_t*)&(a.s0))[i] ^ ((uint8_t*)&(a.s1))[i];
for (int i = 0; i < 8; ++i)
result |= ((uint8_t*)&(b.s0))[i] ^ ((uint8_t*)&(b.s1))[i];
return result;
}
/* set padding byte in 64-bit Ascon word */
__forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); }
/* byte mask for 64-bit Ascon word (1 <= n <= 8) */
__forceinline word_t XMASK(int n) {
return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8));
}
#endif /* WORD_H_ */
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 16
#include "ascon.h"
#include "api.h"
#include "loadstore.h"
#include "permutations.h"
#include "printstate.h"
__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 20) {
XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4)));
k += 4;
}
XOR(*K1, LOAD64(k));
XOR(*K2, LOAD64(k + 8));
}
__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) {
word_t N0, N1;
word_t K0, K1, K2;
/* load nonce */
N0 = LOAD64(npub);
N1 = LOAD64(npub + 8);
/* load key */
loadkey(&K0, &K1, &K2, k);
/* initialization */
PINIT(s);
XOR(s->x0, IV);
if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0);
XOR(s->x1, K1);
XOR(s->x2, K2);
XOR(s->x3, N0);
XOR(s->x4, N1);
P12(s);
if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("initialization", s);
}
__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
word_t* restrict px;
/* process associated data */
if (adlen) {
while (adlen >= ASCON_RATE) {
XOR(s->x0, LOAD64(ad));
if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8));
PB(s);
ad += ASCON_RATE;
adlen -= ASCON_RATE;
}
/* final associated data block */
px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) {
XOR(s->x0, LOAD64(ad));
px = &s->x1;
ad += 8;
adlen -= 8;
}
if (adlen) XOR(*px, LOAD(ad, adlen));
XOR(*px, PAD(adlen));
PB(s);
}
XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
}
__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
word_t* restrict px;
/* process plaintext */
while (mlen >= ASCON_RATE) {
XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
if (ASCON_RATE == 16) {
XOR(s->x1, LOAD64(m + 8));
STORE64(c + 8, s->x1);
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
mlen -= ASCON_RATE;
}
/* final plaintext block */
px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) {
XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
px = &s->x1;
m += 8;
c += 8;
mlen -= 8;
}
if (mlen) {
XOR(*px, LOAD(m, mlen));
STORE(c, *px, mlen);
}
XOR(*px, PAD(mlen));
printstate("process plaintext", s);
}
__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
word_t* restrict px;
word_t cx;
/* process ciphertext */
while (clen >= ASCON_RATE) {
cx = LOAD64(c);
XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
if (ASCON_RATE == 16) {
cx = LOAD64(c + 8);
XOR(s->x1, cx);
STORE64(m + 8, s->x1);
s->x1 = cx;
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
clen -= ASCON_RATE;
}
/* final ciphertext block */
px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) {
cx = LOAD64(c);
XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
px = &s->x1;
m += 8;
c += 8;
clen -= 8;
}
if (clen) {
cx = LOAD(c, clen);
XOR(*px, cx);
STORE(m, *px, clen);
AND(*px, XMASK(clen));
XOR(*px, cx);
}
XOR(*px, PAD(clen));
printstate("process ciphertext", s);
}
__forceinline void final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2;
/* load key */
loadkey(&K0, &K1, &K2, k);
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
XOR(s->x1, K1);
XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
XOR(s->x2, K1);
XOR(s->x3, K2);
}
if (CRYPTO_KEYBYTES == 20) {
XOR(s->x1, KEYROT(K0, K1));
XOR(s->x2, KEYROT(K1, K2));
XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("finalization", s);
}
#if !ASCON_INLINE_MODE
#define INIT ascon_init
#define ABSORB ascon_absorb
#define ENCRYPT ascon_encrypt
#define DECRYPT ascon_decrypt
#define FINAL ascon_final
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
init(s, npub, k);
}
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
absorb(s, ad, adlen);
}
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) {
encrypt(s, c, m, mlen);
}
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) {
decrypt(s, m, c, clen);
}
void ascon_final(state_t* s, const uint8_t* k) { final(s, k); }
#else
#define INIT init
#define ABSORB absorb
#define ENCRYPT encrypt
#define DECRYPT decrypt
#define FINAL final
#endif
int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m,
uint64_t mlen, const uint8_t* ad, uint64_t adlen,
const uint8_t* nsec, const uint8_t* npub,
const uint8_t* k) {
state_t s;
(void)nsec;
*clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */
INIT(&s, npub, k);
ABSORB(&s, ad, adlen);
ENCRYPT(&s, c, m, mlen);
FINAL(&s, k);
/* set tag */
c += mlen;
STORE64(c, s.x3);
STORE64(c + 8, s.x4);
return 0;
}
int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec,
const uint8_t* c, uint64_t clen, const uint8_t* ad,
uint64_t adlen, const uint8_t* npub, const uint8_t* k) {
state_t s;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
*mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */
INIT(&s, npub, k);
ABSORB(&s, ad, adlen);
DECRYPT(&s, m, c, clen);
FINAL(&s, k);
/* verify tag (should be constant time, check compiler output) */
c += clen;
XOR(s.x3, LOAD64(c));
XOR(s.x4, LOAD64(c + 8));
if (NOTZERO(s.x3, s.x4)) {
*mlen = 0;
return -1;
}
return 0;
}
#ifndef ASCON_H_
#define ASCON_H_
#include <stdint.h>
#include "config.h"
#include "word.h"
typedef struct {
word_t x0, x1, x2, x3, x4;
word_t rx;
} state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k);
#endif // ASCON_H_
#ifndef CONFIG_H_
#define CONFIG_H_
/* inline the Ascon mode */
#ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1
#endif
/* inline the Ascon permutations */
#ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 1
#endif
/* single function for all permutations */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 0
#endif
/* unroll the permutation loops */
#ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 1
#endif
/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */
#ifndef ASCON_DATA_ACCESS
#define ASCON_DATA_ACCESS 'H'
#endif
/* Ascon mask generator { 'S'tdlib, 'X'orshift, 'R'andombytes } */
#ifndef ASCON_MASK_RNG
#define ASCON_MASK_RNG 'X'
#endif
/* mask key/data loads */
#ifndef ASCON_MASK_LOADS
#define ASCON_MASK_LOADS 0
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif
#endif /* CONFIG_H_ */
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* macros for big endian machines */
#ifndef NDEBUG
#pragma message("Using macros for big endian machines")
#endif
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
/* macros for little endian machines */
#ifndef NDEBUG
#pragma message("Using macros for little endian machines")
#endif
#define U64BIG(x) \
(((0x00000000000000FFULL & (x)) << 56) | \
((0x000000000000FF00ULL & (x)) << 40) | \
((0x0000000000FF0000ULL & (x)) << 24) | \
((0x00000000FF000000ULL & (x)) << 8) | \
((0x000000FF00000000ULL & (x)) >> 8) | \
((0x0000FF0000000000ULL & (x)) >> 24) | \
((0x00FF000000000000ULL & (x)) >> 40) | \
((0xFF00000000000000ULL & (x)) >> 56))
#define U32BIG(x) \
(((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \
((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24))
#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8))
#else
#error "Ascon byte order macros not defined in endian.h"
#endif
#endif /* ENDIAN_H_ */
#ifndef LOADSTORE_H_
#define LOADSTORE_H_
#include <stdint.h>
#include "config.h"
#include "endian.h"
#include "word.h"
/* 64-bit LSB mask (undefined for n == 0) */
#define MASK(n) (~0ull >> (64 - (n)))
/* get byte from Ascon 64-bit word */
#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i))))
/* set byte in Ascon 64-bit word */
#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i)))
#if ASCON_DATA_ACCESS == 'W'
#ifndef NDEBUG
#pragma message("Using wordwise data access")
#endif
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = U64BIG(*(uint64_t*)bytes);
return U64TOWORD(x);
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n));
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(8 * n);
*(uint64_t*)bytes |= U64BIG(x);
}
#elif ASCON_DATA_ACCESS == 'M'
#ifndef NDEBUG
#pragma message("Using memcpy to access data")
#endif
#include <string.h>
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
memcpy((uint8_t*)&x, bytes, n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = U64BIG(WORDTOU64(w));
memcpy(bytes, (uint8_t*)&x, n);
}
#elif ASCON_DATA_ACCESS == 'B'
#ifndef NDEBUG
#pragma message("Using bytewise data access")
#endif
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i;
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i;
}
#elif ASCON_DATA_ACCESS == 'H'
#ifndef NDEBUG
#pragma message("Using hybrid data access")
#endif
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
if (n == 8)
x = U64BIG(*(uint64_t*)bytes);
else
for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i;
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
if (n == 8)
*(uint64_t*)bytes = U64BIG(x);
else
for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i;
}
#else
#error "Ascon data access macro not defined correctly"
#endif
#endif /* LOADSTORE_H_ */
#include "permutations.h"
#include "round.h"
#if ASCON_INLINE_PERM
#elif ASCON_SINGLE_PERM
void P(state_t* s, uint8_t rounds) {
printstate(" permutation input", s);
for (int i = START(rounds); i > 0x4a; i -= 0x0f) ROUND(s, i);
}
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s) { P12ROUNDS(s); }
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16
void P8(state_t* s) { P8ROUNDS(s); }
#endif
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8
void P6(state_t* s) { P6ROUNDS(s); }
#endif
#endif
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
#include <stdint.h>
#include "api.h"
#include "ascon.h"
#include "printstate.h"
#include "round.h"
#define ASCON_128_KEYBYTES 16
#define ASCON_128A_KEYBYTES 16
#define ASCON_80PQ_KEYBYTES 20
#define ASCON_128_RATE 8
#define ASCON_128A_RATE 16
#define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32
#define ASCON_128_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_128A_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128A_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128A_PB_ROUNDS) << 32))
#define ASCON_80PQ_IV \
U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_HASH_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_HASH_BYTES * 8) << 0))
#define ASCON_XOF_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40))
#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull)
#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull)
#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull)
#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull)
#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull)
#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull)
#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull)
#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull)
#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull)
#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#if ASCON_RATE == 16
#define IV ASCON_128A_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define PB P8
#endif
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20
#define IV ASCON_80PQ_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#define START(n) ((3 + (n)) << 4 | (12 - (n)))
#if ASCON_UNROLL_LOOPS
__forceinline void P12ROUNDS(state_t* s) {
ROUND(s, 0xf0);
ROUND(s, 0xe1);
ROUND(s, 0xd2);
ROUND(s, 0xc3);
ROUND(s, 0xb4);
ROUND(s, 0xa5);
ROUND(s, 0x96);
ROUND(s, 0x87);
ROUND(s, 0x78);
ROUND(s, 0x69);
ROUND(s, 0x5a);
ROUND(s, 0x4b);
}
__forceinline void P8ROUNDS(state_t* s) {
ROUND(s, 0xb4);
ROUND(s, 0xa5);
ROUND(s, 0x96);
ROUND(s, 0x87);
ROUND(s, 0x78);
ROUND(s, 0x69);
ROUND(s, 0x5a);
ROUND(s, 0x4b);
}
__forceinline void P6ROUNDS(state_t* s) {
ROUND(s, 0x96);
ROUND(s, 0x87);
ROUND(s, 0x78);
ROUND(s, 0x69);
ROUND(s, 0x5a);
ROUND(s, 0x4b);
}
#else /* !ASCON_UNROLL_LOOPS */
__forceinline void P12ROUNDS(state_t* s) {
for (int i = START(12); i > 0x4a; i -= 0x0f) ROUND(s, i);
}
__forceinline void P8ROUNDS(state_t* s) {
for (int i = START(8); i > 0x4a; i -= 0x0f) ROUND(s, i);
}
__forceinline void P6ROUNDS(state_t* s) {
for (int i = START(6); i > 0x4a; i -= 0x0f) ROUND(s, i);
}
#endif
#if ASCON_INLINE_PERM
__forceinline void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
__forceinline void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
__forceinline void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#elif ASCON_SINGLE_PERM
#define P12(s) P(s, 12)
#define P8(s) P(s, 8)
#define P6(s) P(s, 6)
void P(state_t* s, uint8_t rounds);
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#endif
#endif /* PERMUTATIONS_H_ */
#ifndef PRINTSTATE_H_
#define PRINTSTATE_H_
#ifdef NDEBUG
#define printword(text, w)
#define printstate(text, s)
#else
#include <inttypes.h>
#include <stdio.h>
#include "ascon.h"
__forceinline void printword(const char* text, const word_t x) {
printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x));
}
__forceinline void printstate(const char* text, const state_t* s) {
printf("%s:\n", text);
printword(" x0", s->x0);
printword(" x1", s->x1);
printword(" x2", s->x2);
printword(" x3", s->x3);
printword(" x4", s->x4);
}
#endif
#endif /* PRINTSTATE_H_ */
#include "random.h"
#include <stdlib.h>
#include <time.h>
#include "config.h"
#if ASCON_MASK_RNG == 'R'
void randinit() { srand(time(0)); }
uint32_t rand32() {
uint32_t r;
randombytes(&r, 4);
return r;
}
uint64_t rand64() {
uint64_t r;
randombytes(&r, 8);
return r;
}
#elif ASCON_MASK_RNG == 'S'
void randinit() { srand(time(0)); }
uint32_t rand32() { return ((uint32_t)rand() << 21) ^ rand(); }
uint64_t rand64() {
return ((uint64_t)rand() << 43) ^ ((uint64_t)rand() << 21) ^ rand();
}
#elif ASCON_MASK_RNG == 'X'
uint32_t xorshift32;
uint64_t xorshift64;
void randinit() {
srand(time(0));
xorshift32 = rand();
xorshift64 = (uint64_t)rand() << 32 | rand();
}
uint32_t rand32() {
uint32_t x = xorshift32;
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
return xorshift32 = x;
}
uint64_t rand64() {
uint64_t x = xorshift64;
x ^= x << 13;
x ^= x >> 7;
x ^= x << 17;
return xorshift64 = x;
}
#endif
#ifndef RANDOM_H_
#define RANDOM_H_
#include <stdint.h>
void randinit();
uint32_t rand32();
uint64_t rand64();
#endif /* RANDOM_H_ */
#ifndef ROUND_H_
#define ROUND_H_
#include "ascon.h"
#include "printstate.h"
#include "random.h"
__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) {
*K0 = TOSHARES(0);
*K1 = TOSHARES(0);
*K2 = TOSHARES(0);
}
__forceinline void PINIT(state_t* s) {
randinit();
s->x0 = TOSHARES(0);
s->x1 = TOSHARES(0);
s->x2 = TOSHARES(0);
s->x3 = TOSHARES(0);
s->x4 = TOSHARES(0);
s->rx = TOSHARES(0);
}
#define TOFFOLI(a0, a1, a2, b0, b1, b2, c0, c1, c2) \
do { \
(a0) ^= (~(b0)) & (c0); \
(a0) ^= (b0) & (c2); \
(a0) ^= (b2) & (c0); \
(a1) ^= (~(b1)) & (c1); \
(a1) ^= (b1) & (c0); \
(a1) ^= (b0) & (c1); \
(a2) ^= (~(b2)) & (c2); \
(a2) ^= (b2) & (c1); \
(a2) ^= (b1) & (c2); \
} while (0)
__forceinline void ROUND(state_t* s, uint64_t C) {
/* refresh randomness */
/* s->rx = TOSHARES(0); */
/* addition of round constant */
s->x2.s0 ^= C;
/* substitution layer */
s->x0.s0 ^= s->x4.s0;
s->x4.s0 ^= s->x3.s0;
s->x2.s0 ^= s->x1.s0;
s->x0.s1 ^= s->x4.s1;
s->x4.s1 ^= s->x3.s1;
s->x2.s1 ^= s->x1.s1;
s->x0.s2 ^= s->x4.s2;
s->x4.s2 ^= s->x3.s2;
s->x2.s2 ^= s->x1.s2;
/* start of shared keccak s-box from https://eprint.iacr.org/2019/536 */
s->rx.s2 = s->rx.s0;
s->rx.s0 ^= s->rx.s1;
TOFFOLI(s->rx.s0, s->rx.s1, s->rx.s2, s->x4.s0, s->x4.s1, s->x4.s2, s->x0.s0,
s->x0.s1, s->x0.s2);
TOFFOLI(s->x0.s0, s->x0.s1, s->x0.s2, s->x1.s0, s->x1.s1, s->x1.s2, s->x2.s0,
s->x2.s1, s->x2.s2);
TOFFOLI(s->x2.s0, s->x2.s1, s->x2.s2, s->x3.s0, s->x3.s1, s->x3.s2, s->x4.s0,
s->x4.s1, s->x4.s2);
TOFFOLI(s->x4.s0, s->x4.s1, s->x4.s2, s->x0.s0, s->x0.s1, s->x0.s2, s->x1.s0,
s->x1.s1, s->x1.s2);
TOFFOLI(s->x1.s0, s->x1.s1, s->x1.s2, s->x2.s0, s->x2.s1, s->x2.s2, s->x3.s0,
s->x3.s1, s->x3.s2);
s->x3.s2 ^= s->rx.s2;
s->x3.s1 ^= s->rx.s1;
s->x3.s0 ^= s->rx.s0;
/* end of shared keccak s-box */
s->x1.s0 ^= s->x0.s0;
s->x0.s0 ^= s->x4.s0;
s->x3.s0 ^= s->x2.s0;
s->x2.s0 = ~s->x2.s0;
s->x1.s1 ^= s->x0.s1;
s->x0.s1 ^= s->x4.s1;
s->x3.s1 ^= s->x2.s1;
s->x1.s2 ^= s->x0.s2;
s->x0.s2 ^= s->x4.s2;
s->x3.s2 ^= s->x2.s2;
/* linear diffusion layer */
s->x0.s2 ^= ROR64(s->x0.s2, 19) ^ ROR64(s->x0.s2, 28);
s->x1.s2 ^= ROR64(s->x1.s2, 61) ^ ROR64(s->x1.s2, 39);
s->x2.s2 ^= ROR64(s->x2.s2, 1) ^ ROR64(s->x2.s2, 6);
s->x3.s2 ^= ROR64(s->x3.s2, 10) ^ ROR64(s->x3.s2, 17);
s->x4.s2 ^= ROR64(s->x4.s2, 7) ^ ROR64(s->x4.s2, 41);
s->x0.s1 ^= ROR64(s->x0.s1, 19) ^ ROR64(s->x0.s1, 28);
s->x1.s1 ^= ROR64(s->x1.s1, 61) ^ ROR64(s->x1.s1, 39);
s->x2.s1 ^= ROR64(s->x2.s1, 1) ^ ROR64(s->x2.s1, 6);
s->x3.s1 ^= ROR64(s->x3.s1, 10) ^ ROR64(s->x3.s1, 17);
s->x4.s1 ^= ROR64(s->x4.s1, 7) ^ ROR64(s->x4.s1, 41);
s->x0.s0 ^= ROR64(s->x0.s0, 19) ^ ROR64(s->x0.s0, 28);
s->x1.s0 ^= ROR64(s->x1.s0, 61) ^ ROR64(s->x1.s0, 39);
s->x2.s0 ^= ROR64(s->x2.s0, 1) ^ ROR64(s->x2.s0, 6);
s->x3.s0 ^= ROR64(s->x3.s0, 10) ^ ROR64(s->x3.s0, 17);
s->x4.s0 ^= ROR64(s->x4.s0, 7) ^ ROR64(s->x4.s0, 41);
printstate(" round output", s);
}
#endif /* ROUND_H_ */
#ifndef WORD_H_
#define WORD_H_
#include <stdint.h>
#include "config.h"
#include "random.h"
typedef uint64_t share_t;
typedef struct {
share_t s0;
share_t s1;
share_t s2;
} word_t;
__forceinline word_t WORD_T(uint64_t x) { return (word_t){x, 0, 0}; }
__forceinline uint64_t UINT64_T(word_t w) { return w.s0; }
__forceinline word_t TOSHARES(share_t in) {
uint64_t r0 = rand64();
uint64_t r1 = rand64();
return (word_t){in ^ r0 ^ r1, r0, r1};
}
__forceinline share_t FROMSHARES(word_t in) { return in.s0 ^ in.s1 ^ in.s2; }
__forceinline word_t U64TOWORD(uint64_t x) {
#if ASCON_MASK_LOADS
return TOSHARES(x);
#else
return WORD_T(x);
#endif
}
__forceinline uint64_t WORDTOU64(word_t w) { return FROMSHARES(w); }
#define XOR(a, b) \
do { \
word_t t = b; \
(a).s0 ^= t.s0; \
(a).s1 ^= t.s1; \
(a).s2 ^= t.s2; \
} while (0)
#define AND(a, b) \
do { \
word_t ta = a; \
word_t tb = b; \
(a).s0 = (ta.s0 & tb.s0) ^ (ta.s0 & tb.s1) ^ (ta.s0 & tb.s2); \
(a).s1 = (ta.s1 & tb.s0) ^ (ta.s1 & tb.s1) ^ (ta.s1 & tb.s2); \
(a).s2 = (ta.s2 & tb.s0) ^ (ta.s2 & tb.s1) ^ (ta.s2 & tb.s2); \
} while (0)
__forceinline uint64_t ROR64(uint64_t x, int n) {
return x >> n | x << (64 - n);
}
__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) {
word_t r;
r.s0 = lo2hi.s0 << 32 | hi2lo.s0 >> 32;
r.s1 = lo2hi.s1 << 32 | hi2lo.s1 >> 32;
r.s2 = lo2hi.s2 << 32 | hi2lo.s2 >> 32;
return r;
}
__forceinline int NOTZERO(word_t a, word_t b) {
int result = 0;
for (int i = 0; i < 8; ++i)
result |= ((uint8_t*)&(a.s0))[i] ^ ((uint8_t*)&(a.s1))[i] ^
((uint8_t*)&(a.s2))[i];
for (int i = 0; i < 8; ++i)
result |= ((uint8_t*)&(b.s0))[i] ^ ((uint8_t*)&(b.s1))[i] ^
((uint8_t*)&(b.s2))[i];
return result;
}
/* set padding byte in 64-bit Ascon word */
__forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); }
/* byte mask for 64-bit Ascon word (1 <= n <= 8) */
__forceinline word_t XMASK(int n) {
return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8));
}
#endif /* WORD_H_ */
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#define ASCON_RATE 8
#include "ascon.h"
#include "api.h"
#include "loadstore.h"
#include "permutations.h"
#include "printstate.h"
__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2,
const uint8_t* k) {
KINIT(K0, K1, K2);
if (CRYPTO_KEYBYTES == 20) {
XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4)));
k += 4;
}
XOR(*K1, LOAD64(k));
XOR(*K2, LOAD64(k + 8));
}
__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) {
word_t N0, N1;
word_t K0, K1, K2;
/* load nonce */
N0 = LOAD64(npub);
N1 = LOAD64(npub + 8);
/* load key */
loadkey(&K0, &K1, &K2, k);
/* initialization */
PINIT(s);
XOR(s->x0, IV);
if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0);
XOR(s->x1, K1);
XOR(s->x2, K2);
XOR(s->x3, N0);
XOR(s->x4, N1);
P12(s);
if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("initialization", s);
}
__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
word_t* restrict px;
/* process associated data */
if (adlen) {
while (adlen >= ASCON_RATE) {
XOR(s->x0, LOAD64(ad));
if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8));
PB(s);
ad += ASCON_RATE;
adlen -= ASCON_RATE;
}
/* final associated data block */
px = &s->x0;
if (ASCON_RATE == 16 && adlen >= 8) {
XOR(s->x0, LOAD64(ad));
px = &s->x1;
ad += 8;
adlen -= 8;
}
if (adlen) XOR(*px, LOAD(ad, adlen));
XOR(*px, PAD(adlen));
PB(s);
}
XOR(s->x4, WORD_T(1));
printstate("process associated data", s);
}
__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m,
uint64_t mlen) {
word_t* restrict px;
/* process plaintext */
while (mlen >= ASCON_RATE) {
XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
if (ASCON_RATE == 16) {
XOR(s->x1, LOAD64(m + 8));
STORE64(c + 8, s->x1);
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
mlen -= ASCON_RATE;
}
/* final plaintext block */
px = &s->x0;
if (ASCON_RATE == 16 && mlen >= 8) {
XOR(s->x0, LOAD64(m));
STORE64(c, s->x0);
px = &s->x1;
m += 8;
c += 8;
mlen -= 8;
}
if (mlen) {
XOR(*px, LOAD(m, mlen));
STORE(c, *px, mlen);
}
XOR(*px, PAD(mlen));
printstate("process plaintext", s);
}
__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c,
uint64_t clen) {
word_t* restrict px;
word_t cx;
/* process ciphertext */
while (clen >= ASCON_RATE) {
cx = LOAD64(c);
XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
if (ASCON_RATE == 16) {
cx = LOAD64(c + 8);
XOR(s->x1, cx);
STORE64(m + 8, s->x1);
s->x1 = cx;
}
PB(s);
m += ASCON_RATE;
c += ASCON_RATE;
clen -= ASCON_RATE;
}
/* final ciphertext block */
px = &s->x0;
if (ASCON_RATE == 16 && clen >= 8) {
cx = LOAD64(c);
XOR(s->x0, cx);
STORE64(m, s->x0);
s->x0 = cx;
px = &s->x1;
m += 8;
c += 8;
clen -= 8;
}
if (clen) {
cx = LOAD(c, clen);
XOR(*px, cx);
STORE(m, *px, clen);
AND(*px, XMASK(clen));
XOR(*px, cx);
}
XOR(*px, PAD(clen));
printstate("process ciphertext", s);
}
__forceinline void final(state_t* s, const uint8_t* k) {
word_t K0, K1, K2;
/* load key */
loadkey(&K0, &K1, &K2, k);
/* finalization */
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) {
XOR(s->x1, K1);
XOR(s->x2, K2);
}
if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) {
XOR(s->x2, K1);
XOR(s->x3, K2);
}
if (CRYPTO_KEYBYTES == 20) {
XOR(s->x1, KEYROT(K0, K1));
XOR(s->x2, KEYROT(K1, K2));
XOR(s->x3, KEYROT(K2, WORD_T(0)));
}
P12(s);
XOR(s->x3, K1);
XOR(s->x4, K2);
printstate("finalization", s);
}
#if !ASCON_INLINE_MODE
#define INIT ascon_init
#define ABSORB ascon_absorb
#define ENCRYPT ascon_encrypt
#define DECRYPT ascon_decrypt
#define FINAL ascon_final
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) {
init(s, npub, k);
}
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) {
absorb(s, ad, adlen);
}
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) {
encrypt(s, c, m, mlen);
}
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) {
decrypt(s, m, c, clen);
}
void ascon_final(state_t* s, const uint8_t* k) { final(s, k); }
#else
#define INIT init
#define ABSORB absorb
#define ENCRYPT encrypt
#define DECRYPT decrypt
#define FINAL final
#endif
int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m,
uint64_t mlen, const uint8_t* ad, uint64_t adlen,
const uint8_t* nsec, const uint8_t* npub,
const uint8_t* k) {
state_t s;
(void)nsec;
*clen = mlen + CRYPTO_ABYTES;
/* perform ascon computation */
INIT(&s, npub, k);
ABSORB(&s, ad, adlen);
ENCRYPT(&s, c, m, mlen);
FINAL(&s, k);
/* set tag */
c += mlen;
STORE64(c, s.x3);
STORE64(c + 8, s.x4);
return 0;
}
int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec,
const uint8_t* c, uint64_t clen, const uint8_t* ad,
uint64_t adlen, const uint8_t* npub, const uint8_t* k) {
state_t s;
(void)nsec;
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
*mlen = clen = clen - CRYPTO_ABYTES;
/* perform ascon computation */
INIT(&s, npub, k);
ABSORB(&s, ad, adlen);
DECRYPT(&s, m, c, clen);
FINAL(&s, k);
/* verify tag (should be constant time, check compiler output) */
c += clen;
XOR(s.x3, LOAD64(c));
XOR(s.x4, LOAD64(c + 8));
if (NOTZERO(s.x3, s.x4)) {
*mlen = 0;
return -1;
}
return 0;
}
#ifndef ASCON_H_
#define ASCON_H_
#include <stdint.h>
#include "config.h"
#include "word.h"
typedef struct {
word_t x0, x1, x2, x3, x4;
word_t rx;
} state_t;
void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k);
void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen);
void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen);
void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen);
void ascon_final(state_t* s, const uint8_t* k);
#endif // ASCON_H_
#ifndef CONFIG_H_
#define CONFIG_H_
/* inline the Ascon mode */
#ifndef ASCON_INLINE_MODE
#define ASCON_INLINE_MODE 1
#endif
/* inline the Ascon permutations */
#ifndef ASCON_INLINE_PERM
#define ASCON_INLINE_PERM 0
#endif
/* single function for all permutations */
#ifndef ASCON_SINGLE_PERM
#define ASCON_SINGLE_PERM 0
#endif
/* unroll the permutation loops */
#ifndef ASCON_UNROLL_LOOPS
#define ASCON_UNROLL_LOOPS 0
#endif
/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */
#ifndef ASCON_DATA_ACCESS
#define ASCON_DATA_ACCESS 'B'
#endif
/* Ascon mask generator { 'S'tdlib, 'X'orshift, 'R'andombytes } */
#ifndef ASCON_MASK_RNG
#define ASCON_MASK_RNG 'X'
#endif
/* mask key/data loads */
#ifndef ASCON_MASK_LOADS
#define ASCON_MASK_LOADS 0
#endif
/* make sure __forceinline is supported */
#ifndef __forceinline
#define __forceinline inline __attribute__((always_inline))
#endif
#endif /* CONFIG_H_ */
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* macros for big endian machines */
#ifndef NDEBUG
#pragma message("Using macros for big endian machines")
#endif
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
/* macros for little endian machines */
#ifndef NDEBUG
#pragma message("Using macros for little endian machines")
#endif
#define U64BIG(x) \
(((0x00000000000000FFULL & (x)) << 56) | \
((0x000000000000FF00ULL & (x)) << 40) | \
((0x0000000000FF0000ULL & (x)) << 24) | \
((0x00000000FF000000ULL & (x)) << 8) | \
((0x000000FF00000000ULL & (x)) >> 8) | \
((0x0000FF0000000000ULL & (x)) >> 24) | \
((0x00FF000000000000ULL & (x)) >> 40) | \
((0xFF00000000000000ULL & (x)) >> 56))
#define U32BIG(x) \
(((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \
((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24))
#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8))
#else
#error "Ascon byte order macros not defined in endian.h"
#endif
#endif /* ENDIAN_H_ */
#ifndef LOADSTORE_H_
#define LOADSTORE_H_
#include <stdint.h>
#include "config.h"
#include "endian.h"
#include "word.h"
/* 64-bit LSB mask (undefined for n == 0) */
#define MASK(n) (~0ull >> (64 - (n)))
/* get byte from Ascon 64-bit word */
#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i))))
/* set byte in Ascon 64-bit word */
#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i)))
#if ASCON_DATA_ACCESS == 'W'
#ifndef NDEBUG
#pragma message("Using wordwise data access")
#endif
__forceinline word_t LOAD64(const uint8_t* bytes) {
uint64_t x = U64BIG(*(uint64_t*)bytes);
return U64TOWORD(x);
}
__forceinline void STORE64(uint8_t* bytes, word_t w) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes = U64BIG(x);
}
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n));
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
*(uint64_t*)bytes &= ~MASK(8 * n);
*(uint64_t*)bytes |= U64BIG(x);
}
#elif ASCON_DATA_ACCESS == 'M'
#ifndef NDEBUG
#pragma message("Using memcpy to access data")
#endif
#include <string.h>
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
memcpy((uint8_t*)&x, bytes, n);
return U64TOWORD(U64BIG(x));
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = U64BIG(WORDTOU64(w));
memcpy(bytes, (uint8_t*)&x, n);
}
#elif ASCON_DATA_ACCESS == 'B'
#ifndef NDEBUG
#pragma message("Using bytewise data access")
#endif
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i;
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i;
}
#elif ASCON_DATA_ACCESS == 'H'
#ifndef NDEBUG
#pragma message("Using hybrid data access")
#endif
#define LOAD64(bytes) LOAD(bytes, 8)
#define STORE64(bytes, w) STORE(bytes, w, 8)
__forceinline word_t LOAD(const uint8_t* bytes, int n) {
uint64_t x = 0;
if (n == 8)
x = U64BIG(*(uint64_t*)bytes);
else
for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i;
return U64TOWORD(x);
}
__forceinline void STORE(uint8_t* bytes, word_t w, int n) {
uint64_t x = WORDTOU64(w);
if (n == 8)
*(uint64_t*)bytes = U64BIG(x);
else
for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i;
}
#else
#error "Ascon data access macro not defined correctly"
#endif
#endif /* LOADSTORE_H_ */
#include "permutations.h"
#include "round.h"
#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM
const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9},
{0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9},
{0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}};
#endif
#if ASCON_INLINE_PERM
#elif ASCON_SINGLE_PERM
void P(state_t* s, uint8_t rounds) {
printstate(" permutation input", s);
for (int i = START(rounds); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s) { P12ROUNDS(s); }
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16
void P8(state_t* s) { P8ROUNDS(s); }
#endif
#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8
void P6(state_t* s) { P6ROUNDS(s); }
#endif
#endif
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
#include <stdint.h>
#include "api.h"
#include "ascon.h"
#include "printstate.h"
#include "round.h"
#define ASCON_128_KEYBYTES 16
#define ASCON_128A_KEYBYTES 16
#define ASCON_80PQ_KEYBYTES 20
#define ASCON_128_RATE 8
#define ASCON_128A_RATE 16
#define ASCON_128_PA_ROUNDS 12
#define ASCON_128_PB_ROUNDS 6
#define ASCON_128A_PB_ROUNDS 8
#define ASCON_HASH_BYTES 32
#define ASCON_128_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_128A_IV \
U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128A_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128A_PB_ROUNDS) << 32))
#define ASCON_80PQ_IV \
U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \
((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_128_PB_ROUNDS) << 32))
#define ASCON_HASH_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \
((uint64_t)(ASCON_HASH_BYTES * 8) << 0))
#define ASCON_XOF_IV \
U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \
((uint64_t)(ASCON_128_PA_ROUNDS) << 40))
#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull)
#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull)
#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull)
#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull)
#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull)
#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull)
#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull)
#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull)
#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull)
#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull)
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16
#define IV ASCON_128_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#if ASCON_RATE == 16
#define IV ASCON_128A_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define PB P8
#endif
#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20
#define IV ASCON_80PQ_IV
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define PB P6
#endif
#define START(n) (12 - n)
#if ASCON_UNROLL_LOOPS
__forceinline void P12ROUNDS(state_t* s) {
ROUND(s, 0xc, 0xc);
ROUND(s, 0x9, 0xc);
ROUND(s, 0xc, 0x9);
ROUND(s, 0x9, 0x9);
ROUND(s, 0x6, 0xc);
ROUND(s, 0x3, 0xc);
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
__forceinline void P8ROUNDS(state_t* s) {
ROUND(s, 0x6, 0xc);
ROUND(s, 0x3, 0xc);
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
__forceinline void P6ROUNDS(state_t* s) {
ROUND(s, 0x6, 0x9);
ROUND(s, 0x3, 0x9);
ROUND(s, 0xc, 0x6);
ROUND(s, 0x9, 0x6);
ROUND(s, 0xc, 0x3);
ROUND(s, 0x9, 0x3);
}
#else /* !ASCON_UNROLL_LOOPS */
extern const uint8_t constants[][2];
__forceinline void P12ROUNDS(state_t* s) {
for (int i = START(12); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
__forceinline void P8ROUNDS(state_t* s) {
for (int i = START(8); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
__forceinline void P6ROUNDS(state_t* s) {
for (int i = START(6); i < 12; i++)
ROUND(s, constants[i][0], constants[i][1]);
}
#endif
#if ASCON_INLINE_PERM
__forceinline void P12(state_t* s) {
printstate(" permutation input", s);
P12ROUNDS(s);
}
__forceinline void P8(state_t* s) {
printstate(" permutation input", s);
P8ROUNDS(s);
}
__forceinline void P6(state_t* s) {
printstate(" permutation input", s);
P6ROUNDS(s);
}
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#elif ASCON_SINGLE_PERM
#define P12(s) P(s, 12)
#define P8(s) P(s, 8)
#define P6(s) P(s, 6)
void P(state_t* s, uint8_t rounds);
#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */
void P12(state_t* s);
void P8(state_t* s);
void P6(state_t* s);
__forceinline void P(state_t* s, int i) {
if (i == 12) P12(s);
if (i == 8) P8(s);
if (i == 6) P6(s);
}
#endif
#endif /* PERMUTATIONS_H_ */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment