Commit 9c6d9e4a by Ferdinand Bachmann Committed by Sebastian Renner

ascon

parent 4063295f
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include <string.h>
#include "core.h"
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode) {
u32_2 t0, t1;
u64 tmp0, tmp1;
while (len >= RATE) {
tmp0 = (*(u64*)in);
t0 = to_bit_interleaving_big(tmp0);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
tmp1 = (*(u64*)(in + 8));
t1 = to_bit_interleaving_big(tmp1);
s->x1.e ^= t1.e;
s->x1.o ^= t1.o;
if (mode != ASCON_AD) {
tmp0 = from_bit_interleaving_big(s->x0);
*(u64*)out = (tmp0);
tmp1 = from_bit_interleaving_big(s->x1);
*(u64*)(out + 8) = (tmp1);
}
if (mode == ASCON_DEC) {
s->x0 = t0;
s->x1 = t1;
}
P(s, START_ROUND(PB_ROUNDS));
in += RATE;
out += RATE;
len -= RATE;
}
u8 bytes[16];
memcpy(bytes, in, len);
memset(bytes + len, 0, 16 - len);
bytes[len] ^= 0x80;
u64* tmp = (u64*)bytes;
t0 = to_bit_interleaving_big(tmp[0]);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
t1 = to_bit_interleaving_big(tmp[1]);
s->x1.e ^= t1.e;
s->x1.o ^= t1.o;
if (mode != ASCON_AD) {
tmp[0] = from_bit_interleaving_big(s->x0);
tmp[1] = from_bit_interleaving_big(s->x1);
memcpy(out, bytes, len);
}
if (mode == ASCON_DEC) {
memcpy(bytes, in, len);
s->x0 = to_bit_interleaving_big(tmp[0]);
s->x1 = to_bit_interleaving_big(tmp[1]);
}
}
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode) {
u32_2 K0, K1, N0, N1;
// load key and nonce
K0 = to_bit_interleaving_big((*(u64*)k));
K1 = to_bit_interleaving_big((*(u64*)(k + 8)));
N0 = to_bit_interleaving_big((*(u64*)npub));
N1 = to_bit_interleaving_big((*(u64*)(npub + 8)));
// initialization
to_bit_interleaving_big_immediate(s->x0, IV);
s->x1.o = K0.o;
s->x1.e = K0.e;
s->x2.e = K1.e;
s->x2.o = K1.o;
s->x3.e = N0.e;
s->x3.o = N0.o;
s->x4.e = N1.e;
s->x4.o = N1.o;
P(s, START_ROUND(PA_ROUNDS));
s->x3.e ^= K0.e;
s->x3.o ^= K0.o;
s->x4.e ^= K1.e;
s->x4.o ^= K1.o;
// process associated data
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_AD);
P(s, START_ROUND(PB_ROUNDS));
}
s->x4.e ^= 1;
// process plaintext/ciphertext
process_data(s, out, in, tlen, mode);
// finalization
s->x2.e ^= K0.e;
s->x2.o ^= K0.o;
s->x3.e ^= K1.e;
s->x3.o ^= K1.o;
P(s, START_ROUND(PA_ROUNDS));
s->x3.e ^= K0.e;
s->x3.o ^= K0.o;
s->x4.e ^= K1.e;
s->x4.o ^= K1.o;
}
#ifndef CORE_H_
#define CORE_H_
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define ASCON_AD 0
#define ASCON_ENC 1
#define ASCON_DEC 2
#define RATE (128 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 0 | (u64)(8 * (RATE)) << 8 | \
(u64)(PA_ROUNDS) << 16 | (u64)(PB_ROUNDS) << 24)
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode);
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode);
#endif // CORE_H_
#include "core.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
state s;
u32_2 t0, t1;
(void)nsec;
// set plaintext size
*mlen = clen - CRYPTO_ABYTES;
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC);
// verify tag (should be constant time, check compiler output)
t0 = to_bit_interleaving_big((*(u64*)(c + *mlen)));
t1 = to_bit_interleaving_big((*(u64*)(c + *mlen + 8)));
if (((s.x3.e ^ t0.e) | (s.x3.o ^ t0.o) | (s.x4.e ^ t1.e) | (s.x4.o ^ t1.o)) !=
0) {
*mlen = 0;
return -1;
}
return 0;
}
#include "core.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
state s;
u64 tmp0, tmp1;
(void)nsec;
// set ciphertext size
*clen = mlen + CRYPTO_ABYTES;
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC);
// set tag
tmp0 = from_bit_interleaving_big(s.x3);
*(u64*)(c + mlen) = (tmp0);
tmp1 = from_bit_interleaving_big(s.x4);
*(u64*)(c + mlen + 8) = (tmp1);
return 0;
}
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
Christoph Dobraunig
Martin Schläffer
Ferdinand Bachmann
#include "endian.h"
#include "permutations.h"
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving_big(u64 in) {
u32_2 out;
to_bit_interleaving_big_immediate(out, in);
return out;
}
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving_big(u32_2 in) {
u64 out;
from_bit_interleaving_big_immediate(out, in);
return out;
}
/*
void P(state *p, u8 start_round) {
// implemented in asm
}
*/
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 e;
u32 o;
} u32_2;
typedef struct {
u32_2 x0;
u32_2 x1;
u32_2 x2;
u32_2 x3;
u32_2 x4;
} state;
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define START_ROUND(x) (12 - (x))
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving_big(u64 in);
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving_big(u32_2 in);
#define to_bit_interleaving_big_immediate(out, in) \
do { \
u64 big_in = U64BIG(in); \
u32 hi = (big_in) >> 32; \
u32 lo = (u32)(big_in); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
out.e = (lo & 0x0000FFFF) | (hi << 16); \
out.o = (lo >> 16) | (hi & 0xFFFF0000); \
} while (0)
#define from_bit_interleaving_big_immediate(out, in) \
do { \
u32 lo = (in.e & 0x0000FFFF) | (in.o << 16); \
u32 hi = (in.e >> 16) | (in.o & 0xFFFF0000); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
out = (u64)hi << 32 | lo; \
out = U64BIG(out); \
} while (0)
/* clang-format off */
#define ROUND(C_e, C_o) \
do { \
/* round constant */ \
s.x2.e ^= C_e; s.x2.o ^= C_o; \
/* s-bos.x layer */ \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
s.x4.e ^= s.x3.e; s.x4.o ^= s.x3.o; \
s.x2.e ^= s.x1.e; s.x2.o ^= s.x1.o; \
t0.e = s.x0.e & (~s.x4.e); t0.o = s.x0.o & (~s.x4.o); \
s.x0.e ^= s.x2.e & (~s.x1.e); s.x0.o ^= s.x2.o & (~s.x1.o); \
s.x2.e ^= s.x4.e & (~s.x3.e); s.x2.o ^= s.x4.o & (~s.x3.o); \
s.x4.e ^= s.x1.e & (~s.x0.e); s.x4.o ^= s.x1.o & (~s.x0.o); \
s.x1.e ^= s.x3.e & (~s.x2.e); s.x1.o ^= s.x3.o & (~s.x2.o); \
s.x3.e ^= t0.e; s.x3.o ^= t0.o; \
s.x1.e ^= s.x0.e; s.x1.o ^= s.x0.o; \
s.x3.e ^= s.x2.e; s.x3.o ^= s.x2.o; \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
/* linear layer */ \
t0.e = s.x0.e ^ ROTR32(s.x0.o, 4); \
t0.o = s.x0.o ^ ROTR32(s.x0.e, 5); \
s.x0.e ^= ROTR32(t0.o, 9); \
s.x0.o ^= ROTR32(t0.e, 10); \
t0.e = s.x1.e ^ ROTR32(s.x1.e, 11); \
t0.o = s.x1.o ^ ROTR32(s.x1.o, 11); \
s.x1.e ^= ROTR32(t0.o, 19); \
s.x1.o ^= ROTR32(t0.e, 20); \
t0.e = s.x2.e ^ ROTR32(s.x2.o, 2); \
t0.o = s.x2.o ^ ROTR32(s.x2.e, 3); \
s.x2.e ^= t0.o; \
s.x2.o ^= ROTR32(t0.e, 1); \
t0.e = s.x3.e ^ ROTR32(s.x3.o, 3); \
t0.o = s.x3.o ^ ROTR32(s.x3.e, 4); \
s.x3.e ^= ROTR32(t0.e, 5); \
s.x3.o ^= ROTR32(t0.o, 5); \
t0.e = s.x4.e ^ ROTR32(s.x4.e, 17); \
t0.o = s.x4.o ^ ROTR32(s.x4.o, 17); \
s.x4.e ^= ROTR32(t0.o, 3); \
s.x4.o ^= ROTR32(t0.e, 4); \
s.x2.e = ~s.x2.e; s.x2.o = ~s.x2.o; \
} while(0)
/* clang-format on */
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#include <xtensa/coreasm.h>
.section .data
.align 2
.global ascon_round_constants
.type ascon_round_constants,@object
ascon_round_constants:
.byte 0xc, 0xc
.byte 0x9, 0xc
.byte 0xc, 0x9
.byte 0x9, 0x9
.byte 0x6, 0xc
.byte 0x3, 0xc
.byte 0x6, 0x9
.byte 0x3, 0x9
.byte 0xc, 0x6
.byte 0x9, 0x6
.byte 0xc, 0x3
.byte 0x9, 0x3
#define x0e a4
#define x0o a5
#define x1e a6
#define x1o a7
#define x2e a8
#define x2o a9
#define x3e a10
#define x3o a11
#define x4e a12
#define x4o a13
#define t0e a14
#define t0o a15
#define tmp a14
#define neg a15
.section .text
.align 4
.global P
.type P,@function
P:
abi_entry 4
l32i x0e, a2, 0
l32i x0o, a2, 4
l32i x1e, a2, 8
l32i x1o, a2, 12
l32i x2e, a2, 16
l32i x2o, a2, 20
l32i x3e, a2, 24
l32i x3o, a2, 28
l32i x4e, a2, 32
l32i x4o, a2, 36
s32i a2, a1, 0
movi a2, ascon_round_constants
addx2 a2, a3, a2
.Lround_start:
/* round constant */
l8ui t0e, a2, 0
xor x2e, x2e, a14
l8ui t0o, a2, 1
xor x2o, x2o, a15
/* s-box layer */
/* even */
xor x0e, x0e, x4e
xor x4e, x4e, x3e
xor x2e, x2e, x1e
movi neg, -1
xor neg, neg, x4e
and tmp, neg, x0e
movi neg, -1
xor neg, neg, x1e
and neg, neg, x2e
xor x0e, x0e, neg
movi neg, -1
xor neg, neg, x3e
and neg, neg, x4e
xor x2e, x2e, neg
movi neg, -1
xor neg, neg, x0e
and neg, neg, x1e
xor x4e, x4e, neg
movi neg, -1
xor neg, neg, x2e
and neg, neg, x3e
xor x1e, x1e, neg
xor x3e, x3e, tmp
xor x1e, x1e, x0e
xor x3e, x3e, x2e
xor x0e, x0e, x4e
/* odd */
xor x0o, x0o, x4o
xor x4o, x4o, x3o
xor x2o, x2o, x1o
movi neg, -1
xor neg, neg, x4o
and tmp, neg, x0o
movi neg, -1
xor neg, neg, x1o
and neg, neg, x2o
xor x0o, x0o, neg
movi neg, -1
xor neg, neg, x3o
and neg, neg, x4o
xor x2o, x2o, neg
movi neg, -1
xor neg, neg, x0o
and neg, neg, x1o
xor x4o, x4o, neg
movi neg, -1
xor neg, neg, x2o
and neg, neg, x3o
xor x1o, x1o, neg
xor x3o, x3o, tmp
xor x1o, x1o, x0o
xor x3o, x3o, x2o
xor x0o, x0o, x4o
/* linear layer */
ssai 4
src t0e, x0o, x0o
xor t0e, t0e, x0e
ssai 5
src t0o, x0e, x0e
xor t0o, t0o, x0o
ssai 9
src t0o, t0o, t0o
xor x0e, x0e, t0o
ssai 10
src t0e, t0e, t0e
xor x0o, x0o, t0e
ssai 11
src t0e, x1e, x1e
xor t0e, t0e, x1e
src t0o, x1o, x1o
xor t0o, t0o, x1o
ssai 19
src t0o, t0o, t0o
xor x1e, x1e, t0o
ssai 20
src t0e, t0e, t0e
xor x1o, x1o, t0e
ssai 2
src t0e, x2o, x2o
xor t0e, t0e, x2e
ssai 3
src t0o, x2e, x2e
xor t0o, t0o, x2o
xor x2e, x2e, t0o
ssai 1
src t0e, t0e, t0e
xor x2o, x2o, t0e
ssai 3
src t0e, x3o, x3o
xor t0e, t0e, x3e
ssai 4
src t0o, x3e, x3e
xor t0o, t0o, x3o
ssai 5
src t0e, t0e, t0e
xor x3e, x3e, t0e
src t0o, t0o, t0o
xor x3o, x3o, t0o
ssai 17
src t0e, x4e, x4e
xor t0e, t0e, x4e
src t0o, x4o, x4o
xor t0o, t0o, x4o
ssai 3
src t0o, t0o, t0o
xor x4e, x4e, t0o
ssai 4
src t0e, t0e, t0e
xor x4o, x4o, t0e
movi neg, -1
xor x2e, x2e, neg
xor x2o, x2o, neg
/* loop */
addi a2, a2, 2
addi a3, a3, 1
bltui a3, 12, .Lround_start
.Lend:
l32i a2, a1, 0
s32i x0e, a2, 0
s32i x0o, a2, 4
s32i x1e, a2, 8
s32i x1o, a2, 12
s32i x2e, a2, 16
s32i x2o, a2, 20
s32i x3e, a2, 24
s32i x3o, a2, 28
s32i x4e, a2, 32
s32i x4o, a2, 36
abi_return
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include "core.h"
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode) {
u32_2 t0, t1;
u64 tmp0, tmp1;
u64 i;
while (len >= RATE) {
tmp0 = (*(u64*)in);
t0 = to_bit_interleaving_big(tmp0);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
tmp1 = (*(u64*)(in + 8));
t1 = to_bit_interleaving_big(tmp1);
s->x1.e ^= t1.e;
s->x1.o ^= t1.o;
if (mode != ASCON_AD) {
tmp0 = from_bit_interleaving_big(s->x0);
*(u64*)out = (tmp0);
tmp1 = from_bit_interleaving_big(s->x1);
*(u64*)(out + 8) = (tmp1);
}
if (mode == ASCON_DEC) {
s->x0 = t0;
s->x1 = t1;
}
P(s, PB_ROUNDS);
in += RATE;
out += RATE;
len -= RATE;
}
tmp0 = 0;
tmp1 = 0;
for (i = 0; i < len; ++i, ++in)
if (i < 8)
tmp0 ^= INS_BYTE64(*in, i);
else
tmp1 ^= INS_BYTE64(*in, i % 8);
in -= len;
if (len < 8)
tmp0 ^= INS_BYTE64(0x80, len);
else
tmp1 ^= INS_BYTE64(0x80, len % 8);
t0 = to_bit_interleaving(tmp0);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
t1 = to_bit_interleaving(tmp1);
s->x1.e ^= t1.e;
s->x1.o ^= t1.o;
if (mode != ASCON_AD) {
tmp0 = from_bit_interleaving(s->x0);
tmp1 = from_bit_interleaving(s->x1);
for (i = 0; i < len; ++i, ++out)
if (i < 8)
*out = EXT_BYTE64(tmp0, i);
else
*out = EXT_BYTE64(tmp1, i % 8);
}
if (mode == ASCON_DEC) {
for (i = 0; i < len; ++i, ++in)
if (i < 8) {
tmp0 &= ~INS_BYTE64(0xff, i);
tmp0 |= INS_BYTE64(*in, i);
} else {
tmp1 &= ~INS_BYTE64(0xff, i % 8);
tmp1 |= INS_BYTE64(*in, i % 8);
}
s->x0 = to_bit_interleaving(tmp0);
s->x1 = to_bit_interleaving(tmp1);
}
}
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode) {
u32_2 K0, K1, N0, N1;
// load key and nonce
K0 = to_bit_interleaving_big((*(u64*)k));
K1 = to_bit_interleaving_big((*(u64*)(k + 8)));
N0 = to_bit_interleaving_big((*(u64*)npub));
N1 = to_bit_interleaving_big((*(u64*)(npub + 8)));
// initialization
s->x0 = to_bit_interleaving(IV);
s->x1.o = K0.o;
s->x1.e = K0.e;
s->x2.e = K1.e;
s->x2.o = K1.o;
s->x3.e = N0.e;
s->x3.o = N0.o;
s->x4.e = N1.e;
s->x4.o = N1.o;
P(s, PA_ROUNDS);
s->x3.e ^= K0.e;
s->x3.o ^= K0.o;
s->x4.e ^= K1.e;
s->x4.o ^= K1.o;
// process associated data
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_AD);
P(s, PB_ROUNDS);
}
s->x4.e ^= 1;
// process plaintext/ciphertext
process_data(s, out, in, tlen, mode);
// finalization
s->x2.e ^= K0.e;
s->x2.o ^= K0.o;
s->x3.e ^= K1.e;
s->x3.o ^= K1.o;
P(s, PA_ROUNDS);
s->x3.e ^= K0.e;
s->x3.o ^= K0.o;
s->x4.e ^= K1.e;
s->x4.o ^= K1.o;
}
#ifndef CORE_H_
#define CORE_H_
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define ASCON_AD 0
#define ASCON_ENC 1
#define ASCON_DEC 2
#define RATE (128 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \
(u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32)
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode);
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode);
#endif // CORE_H_
#include "core.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
state s;
u32_2 t0, t1;
(void)nsec;
// set plaintext size
*mlen = clen - CRYPTO_ABYTES;
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC);
// verify tag (should be constant time, check compiler output)
t0 = to_bit_interleaving_big((*(u64*)(c + *mlen)));
t1 = to_bit_interleaving_big((*(u64*)(c + *mlen + 8)));
if (((s.x3.e ^ t0.e) | (s.x3.o ^ t0.o) | (s.x4.e ^ t1.e) | (s.x4.o ^ t1.o)) !=
0) {
*mlen = 0;
return -1;
}
return 0;
}
#include "core.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
state s;
u64 tmp0, tmp1;
(void)nsec;
// set ciphertext size
*clen = mlen + CRYPTO_ABYTES;
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC);
// set tag
tmp0 = from_bit_interleaving_big(s.x3);
*(u64*)(c + mlen) = (tmp0);
tmp1 = from_bit_interleaving_big(s.x4);
*(u64*)(c + mlen + 8) = (tmp1);
return 0;
}
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
Christoph Dobraunig
Martin Schläffer
Ferdinand Bachmann
#include "endian.h"
#include "permutations.h"
static const u8 constants[][2] = {
{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, {0x6, 0xc}, {0x3, 0xc},
{0x6, 0x9}, {0x3, 0x9}, {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}};
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving(u64 in) {
u32 hi = (in) >> 32;
u32 lo = (u32)(in);
u32 r0, r1;
u32_2 out;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
out.e = (lo & 0x0000FFFF) | (hi << 16);
out.o = (lo >> 16) | (hi & 0xFFFF0000);
return out;
}
u32_2 to_bit_interleaving_big(u64 in) {
return to_bit_interleaving(U64BIG(in));
}
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving(u32_2 in) {
u32 lo = (in.e & 0x0000FFFF) | (in.o << 16);
u32 hi = (in.e >> 16) | (in.o & 0xFFFF0000);
u32 r0, r1;
u64 out;
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
out = (u64)hi << 32 | lo;
return out;
}
u64 from_bit_interleaving_big(u32_2 in) {
return U64BIG(from_bit_interleaving(in));
}
void P(state *p, u8 rounds) {
state s = *p;
u32_2 t0, t1, t2, t3, t4;
u32 i, start = START_ROUND(rounds);
for (i = start; i < 12; i++) ROUND(constants[i][0], constants[i][1]);
*p = s;
}
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 e;
u32 o;
} u32_2;
typedef struct {
u32_2 x0;
u32_2 x1;
u32_2 x2;
u32_2 x3;
u32_2 x4;
} state;
#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n)))))
#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n))))
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define START_ROUND(x) (12 - (x))
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving(u64 in);
u32_2 to_bit_interleaving_big(u64 in);
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving(u32_2 in);
u64 from_bit_interleaving_big(u32_2 in);
/* clang-format off */
#define ROUND(C_e, C_o) \
do { \
/* round constant */ \
s.x2.e ^= C_e; s.x2.o ^= C_o; \
/* s-box layer */ \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
s.x4.e ^= s.x3.e; s.x4.o ^= s.x3.o; \
s.x2.e ^= s.x1.e; s.x2.o ^= s.x1.o; \
t0.e = s.x0.e; t0.o = s.x0.o; \
t4.e = s.x4.e; t4.o = s.x4.o; \
t3.e = s.x3.e; t3.o = s.x3.o; \
t1.e = s.x1.e; t1.o = s.x1.o; \
t2.e = s.x2.e; t2.o = s.x2.o; \
s.x0.e = t0.e ^ (~t1.e & t2.e); s.x0.o = t0.o ^ (~t1.o & t2.o); \
s.x2.e = t2.e ^ (~t3.e & t4.e); s.x2.o = t2.o ^ (~t3.o & t4.o); \
s.x4.e = t4.e ^ (~t0.e & t1.e); s.x4.o = t4.o ^ (~t0.o & t1.o); \
s.x1.e = t1.e ^ (~t2.e & t3.e); s.x1.o = t1.o ^ (~t2.o & t3.o); \
s.x3.e = t3.e ^ (~t4.e & t0.e); s.x3.o = t3.o ^ (~t4.o & t0.o); \
s.x1.e ^= s.x0.e; s.x1.o ^= s.x0.o; \
s.x3.e ^= s.x2.e; s.x3.o ^= s.x2.o; \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
/* linear layer */ \
t0.e = s.x0.e ^ ROTR32(s.x0.o, 4); t0.o = s.x0.o ^ ROTR32(s.x0.e, 5); \
t1.e = s.x1.e ^ ROTR32(s.x1.e, 11); t1.o = s.x1.o ^ ROTR32(s.x1.o, 11); \
t2.e = s.x2.e ^ ROTR32(s.x2.o, 2); t2.o = s.x2.o ^ ROTR32(s.x2.e, 3); \
t3.e = s.x3.e ^ ROTR32(s.x3.o, 3); t3.o = s.x3.o ^ ROTR32(s.x3.e, 4); \
t4.e = s.x4.e ^ ROTR32(s.x4.e, 17); t4.o = s.x4.o ^ ROTR32(s.x4.o, 17); \
s.x0.e ^= ROTR32(t0.o, 9); s.x0.o ^= ROTR32(t0.e, 10); \
s.x1.e ^= ROTR32(t1.o, 19); s.x1.o ^= ROTR32(t1.e, 20); \
s.x2.e ^= t2.o; s.x2.o ^= ROTR32(t2.e, 1); \
s.x3.e ^= ROTR32(t3.e, 5); s.x3.o ^= ROTR32(t3.o, 5); \
s.x4.e ^= ROTR32(t4.o, 3); s.x4.o ^= ROTR32(t4.e, 4); \
s.x2.e = ~s.x2.e; s.x2.o = ~s.x2.o; \
} while(0)
/* clang-format on */
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include <string.h>
#include "core.h"
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode) {
u32_2 t0, t1;
u64 tmp0, tmp1;
while (len >= RATE) {
tmp0 = (*(u64*)in);
t0 = to_bit_interleaving_big(tmp0);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
tmp1 = (*(u64*)(in + 8));
t1 = to_bit_interleaving_big(tmp1);
s->x1.e ^= t1.e;
s->x1.o ^= t1.o;
if (mode != ASCON_AD) {
tmp0 = from_bit_interleaving_big(s->x0);
*(u64*)out = (tmp0);
tmp1 = from_bit_interleaving_big(s->x1);
*(u64*)(out + 8) = (tmp1);
}
if (mode == ASCON_DEC) {
s->x0 = t0;
s->x1 = t1;
}
P(s, PB_ROUNDS);
in += RATE;
out += RATE;
len -= RATE;
}
u8 bytes[16];
memcpy(bytes, in, len);
memset(bytes + len, 0, 16 - len);
bytes[len] ^= 0x80;
u64* tmp = (u64*)bytes;
t0 = to_bit_interleaving_big(tmp[0]);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
t1 = to_bit_interleaving_big(tmp[1]);
s->x1.e ^= t1.e;
s->x1.o ^= t1.o;
if (mode != ASCON_AD) {
tmp[0] = from_bit_interleaving_big(s->x0);
tmp[1] = from_bit_interleaving_big(s->x1);
memcpy(out, bytes, len);
}
if (mode == ASCON_DEC) {
memcpy(bytes, in, len);
s->x0 = to_bit_interleaving_big(tmp[0]);
s->x1 = to_bit_interleaving_big(tmp[1]);
}
}
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode) {
u32_2 K0, K1, N0, N1;
// load key and nonce
K0 = to_bit_interleaving_big((*(u64*)k));
K1 = to_bit_interleaving_big((*(u64*)(k + 8)));
N0 = to_bit_interleaving_big((*(u64*)npub));
N1 = to_bit_interleaving_big((*(u64*)(npub + 8)));
// initialization
s->x0 = to_bit_interleaving(IV);
s->x1.o = K0.o;
s->x1.e = K0.e;
s->x2.e = K1.e;
s->x2.o = K1.o;
s->x3.e = N0.e;
s->x3.o = N0.o;
s->x4.e = N1.e;
s->x4.o = N1.o;
P(s, PA_ROUNDS);
s->x3.e ^= K0.e;
s->x3.o ^= K0.o;
s->x4.e ^= K1.e;
s->x4.o ^= K1.o;
// process associated data
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_AD);
P(s, PB_ROUNDS);
}
s->x4.e ^= 1;
// process plaintext/ciphertext
process_data(s, out, in, tlen, mode);
// finalization
s->x2.e ^= K0.e;
s->x2.o ^= K0.o;
s->x3.e ^= K1.e;
s->x3.o ^= K1.o;
P(s, PA_ROUNDS);
s->x3.e ^= K0.e;
s->x3.o ^= K0.o;
s->x4.e ^= K1.e;
s->x4.o ^= K1.o;
}
#ifndef CORE_H_
#define CORE_H_
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define ASCON_AD 0
#define ASCON_ENC 1
#define ASCON_DEC 2
#define RATE (128 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \
(u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32)
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode);
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode);
#endif // CORE_H_
#include "core.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
state s;
u32_2 t0, t1;
(void)nsec;
// set plaintext size
*mlen = clen - CRYPTO_ABYTES;
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC);
// verify tag (should be constant time, check compiler output)
t0 = to_bit_interleaving_big((*(u64*)(c + *mlen)));
t1 = to_bit_interleaving_big((*(u64*)(c + *mlen + 8)));
if (((s.x3.e ^ t0.e) | (s.x3.o ^ t0.o) | (s.x4.e ^ t1.e) | (s.x4.o ^ t1.o)) !=
0) {
*mlen = 0;
return -1;
}
return 0;
}
#include "core.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
state s;
u64 tmp0, tmp1;
(void)nsec;
// set ciphertext size
*clen = mlen + CRYPTO_ABYTES;
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC);
// set tag
tmp0 = from_bit_interleaving_big(s.x3);
*(u64*)(c + mlen) = (tmp0);
tmp1 = from_bit_interleaving_big(s.x4);
*(u64*)(c + mlen + 8) = (tmp1);
return 0;
}
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
Christoph Dobraunig
Martin Schläffer
Ferdinand Bachmann
#include "endian.h"
#include "permutations.h"
static const u8 constants[][2] = {
{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, {0x6, 0xc}, {0x3, 0xc},
{0x6, 0x9}, {0x3, 0x9}, {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}};
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving(u64 in) {
u32 hi = (in) >> 32;
u32 lo = (u32)(in);
u32 r0, r1;
u32_2 out;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
out.e = (lo & 0x0000FFFF) | (hi << 16);
out.o = (lo >> 16) | (hi & 0xFFFF0000);
return out;
}
u32_2 to_bit_interleaving_big(u64 in) {
return to_bit_interleaving(U64BIG(in));
}
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving(u32_2 in) {
u32 lo = (in.e & 0x0000FFFF) | (in.o << 16);
u32 hi = (in.e >> 16) | (in.o & 0xFFFF0000);
u32 r0, r1;
u64 out;
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
out = (u64)hi << 32 | lo;
return out;
}
u64 from_bit_interleaving_big(u32_2 in) {
return U64BIG(from_bit_interleaving(in));
}
void P(state *p, u8 rounds) {
state s = *p;
u32_2 t0, t1, t2, t3, t4;
u32 i, start = START_ROUND(rounds);
for (i = start; i < 12; i++) ROUND(constants[i][0], constants[i][1]);
*p = s;
}
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 e;
u32 o;
} u32_2;
typedef struct {
u32_2 x0;
u32_2 x1;
u32_2 x2;
u32_2 x3;
u32_2 x4;
} state;
#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n)))))
#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n))))
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define START_ROUND(x) (12 - (x))
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving(u64 in);
u32_2 to_bit_interleaving_big(u64 in);
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving(u32_2 in);
u64 from_bit_interleaving_big(u32_2 in);
/* clang-format off */
#define ROUND(C_e, C_o) \
do { \
/* round constant */ \
s.x2.e ^= C_e; s.x2.o ^= C_o; \
/* s-box layer */ \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
s.x4.e ^= s.x3.e; s.x4.o ^= s.x3.o; \
s.x2.e ^= s.x1.e; s.x2.o ^= s.x1.o; \
t0.e = s.x0.e; t0.o = s.x0.o; \
t4.e = s.x4.e; t4.o = s.x4.o; \
t3.e = s.x3.e; t3.o = s.x3.o; \
t1.e = s.x1.e; t1.o = s.x1.o; \
t2.e = s.x2.e; t2.o = s.x2.o; \
s.x0.e = t0.e ^ (~t1.e & t2.e); s.x0.o = t0.o ^ (~t1.o & t2.o); \
s.x2.e = t2.e ^ (~t3.e & t4.e); s.x2.o = t2.o ^ (~t3.o & t4.o); \
s.x4.e = t4.e ^ (~t0.e & t1.e); s.x4.o = t4.o ^ (~t0.o & t1.o); \
s.x1.e = t1.e ^ (~t2.e & t3.e); s.x1.o = t1.o ^ (~t2.o & t3.o); \
s.x3.e = t3.e ^ (~t4.e & t0.e); s.x3.o = t3.o ^ (~t4.o & t0.o); \
s.x1.e ^= s.x0.e; s.x1.o ^= s.x0.o; \
s.x3.e ^= s.x2.e; s.x3.o ^= s.x2.o; \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
/* linear layer */ \
t0.e = s.x0.e ^ ROTR32(s.x0.o, 4); t0.o = s.x0.o ^ ROTR32(s.x0.e, 5); \
t1.e = s.x1.e ^ ROTR32(s.x1.e, 11); t1.o = s.x1.o ^ ROTR32(s.x1.o, 11); \
t2.e = s.x2.e ^ ROTR32(s.x2.o, 2); t2.o = s.x2.o ^ ROTR32(s.x2.e, 3); \
t3.e = s.x3.e ^ ROTR32(s.x3.o, 3); t3.o = s.x3.o ^ ROTR32(s.x3.e, 4); \
t4.e = s.x4.e ^ ROTR32(s.x4.e, 17); t4.o = s.x4.o ^ ROTR32(s.x4.o, 17); \
s.x0.e ^= ROTR32(t0.o, 9); s.x0.o ^= ROTR32(t0.e, 10); \
s.x1.e ^= ROTR32(t1.o, 19); s.x1.o ^= ROTR32(t1.e, 20); \
s.x2.e ^= t2.o; s.x2.o ^= ROTR32(t2.e, 1); \
s.x3.e ^= ROTR32(t3.e, 5); s.x3.o ^= ROTR32(t3.o, 5); \
s.x4.e ^= ROTR32(t4.o, 3); s.x4.o ^= ROTR32(t4.e, 4); \
s.x2.e = ~s.x2.e; s.x2.o = ~s.x2.o; \
} while(0)
/* clang-format on */
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include <string.h>
#include "core.h"
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode) {
u32_2 t0, t1;
u64 tmp0, tmp1;
while (len >= RATE) {
tmp0 = (*(u64*)in);
t0 = to_bit_interleaving_big(tmp0);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
tmp1 = (*(u64*)(in + 8));
t1 = to_bit_interleaving_big(tmp1);
s->x1.e ^= t1.e;
s->x1.o ^= t1.o;
if (mode != ASCON_AD) {
tmp0 = from_bit_interleaving_big(s->x0);
*(u64*)out = (tmp0);
tmp1 = from_bit_interleaving_big(s->x1);
*(u64*)(out + 8) = (tmp1);
}
if (mode == ASCON_DEC) {
s->x0 = t0;
s->x1 = t1;
}
P(s, PB_ROUNDS);
in += RATE;
out += RATE;
len -= RATE;
}
u8 bytes[16];
memcpy(bytes, in, len);
memset(bytes + len, 0, 16 - len);
bytes[len] ^= 0x80;
u64* tmp = (u64*)bytes;
t0 = to_bit_interleaving_big(tmp[0]);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
t1 = to_bit_interleaving_big(tmp[1]);
s->x1.e ^= t1.e;
s->x1.o ^= t1.o;
if (mode != ASCON_AD) {
tmp[0] = from_bit_interleaving_big(s->x0);
tmp[1] = from_bit_interleaving_big(s->x1);
memcpy(out, bytes, len);
}
if (mode == ASCON_DEC) {
memcpy(bytes, in, len);
s->x0 = to_bit_interleaving_big(tmp[0]);
s->x1 = to_bit_interleaving_big(tmp[1]);
}
}
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode) {
u32_2 K0, K1, N0, N1;
// load key and nonce
K0 = to_bit_interleaving_big((*(u64*)k));
K1 = to_bit_interleaving_big((*(u64*)(k + 8)));
N0 = to_bit_interleaving_big((*(u64*)npub));
N1 = to_bit_interleaving_big((*(u64*)(npub + 8)));
// initialization
s->x0 = to_bit_interleaving_big(IV);
s->x1.o = K0.o;
s->x1.e = K0.e;
s->x2.e = K1.e;
s->x2.o = K1.o;
s->x3.e = N0.e;
s->x3.o = N0.o;
s->x4.e = N1.e;
s->x4.o = N1.o;
P(s, PA_ROUNDS);
s->x3.e ^= K0.e;
s->x3.o ^= K0.o;
s->x4.e ^= K1.e;
s->x4.o ^= K1.o;
// process associated data
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_AD);
P(s, PB_ROUNDS);
}
s->x4.e ^= 1;
// process plaintext/ciphertext
process_data(s, out, in, tlen, mode);
// finalization
s->x2.e ^= K0.e;
s->x2.o ^= K0.o;
s->x3.e ^= K1.e;
s->x3.o ^= K1.o;
P(s, PA_ROUNDS);
s->x3.e ^= K0.e;
s->x3.o ^= K0.o;
s->x4.e ^= K1.e;
s->x4.o ^= K1.o;
}
#ifndef CORE_H_
#define CORE_H_
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define ASCON_AD 0
#define ASCON_ENC 1
#define ASCON_DEC 2
#define RATE (128 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 0 | (u64)(8 * (RATE)) << 8 | \
(u64)(PA_ROUNDS) << 16 | (u64)(PB_ROUNDS) << 24)
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode);
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode);
#endif // CORE_H_
#include "core.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
state s;
u32_2 t0, t1;
(void)nsec;
// set plaintext size
*mlen = clen - CRYPTO_ABYTES;
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC);
// verify tag (should be constant time, check compiler output)
t0 = to_bit_interleaving_big((*(u64*)(c + *mlen)));
t1 = to_bit_interleaving_big((*(u64*)(c + *mlen + 8)));
if (((s.x3.e ^ t0.e) | (s.x3.o ^ t0.o) | (s.x4.e ^ t1.e) | (s.x4.o ^ t1.o)) !=
0) {
*mlen = 0;
return -1;
}
return 0;
}
#include "core.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
state s;
u64 tmp0, tmp1;
(void)nsec;
// set ciphertext size
*clen = mlen + CRYPTO_ABYTES;
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC);
// set tag
tmp0 = from_bit_interleaving_big(s.x3);
*(u64*)(c + mlen) = (tmp0);
tmp1 = from_bit_interleaving_big(s.x4);
*(u64*)(c + mlen + 8) = (tmp1);
return 0;
}
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
Christoph Dobraunig
Martin Schläffer
Ferdinand Bachmann
#include "endian.h"
#include "permutations.h"
static const u8 constants[][2] = {
{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, {0x6, 0xc}, {0x3, 0xc},
{0x6, 0x9}, {0x3, 0x9}, {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}};
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving_big(u64 in) {
in = U64BIG(in);
u32 hi = (in) >> 32;
u32 lo = (u32)(in);
u32 r0, r1;
u32_2 out;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
out.e = (lo & 0x0000FFFF) | (hi << 16);
out.o = (lo >> 16) | (hi & 0xFFFF0000);
return out;
}
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving_big(u32_2 in) {
u32 lo = (in.e & 0x0000FFFF) | (in.o << 16);
u32 hi = (in.e >> 16) | (in.o & 0xFFFF0000);
u32 r0, r1;
u64 out;
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
out = (u64)hi << 32 | lo;
out = U64BIG(out);
return out;
}
void P(state *p, u8 rounds) {
state s = *p;
u32_2 t0, t1, t2, t3, t4;
u32 i, start = START_ROUND(rounds);
for (i = start; i < 12; i++) ROUND(constants[i][0], constants[i][1]);
*p = s;
}
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 e;
u32 o;
} u32_2;
typedef struct {
u32_2 x0;
u32_2 x1;
u32_2 x2;
u32_2 x3;
u32_2 x4;
} state;
#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n)))))
#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n))))
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define START_ROUND(x) (12 - (x))
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving_big(u64 in);
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving_big(u32_2 in);
/* clang-format off */
#define ROUND(C_e, C_o) \
do { \
/* round constant */ \
s.x2.e ^= C_e; s.x2.o ^= C_o; \
/* s-box layer */ \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
s.x4.e ^= s.x3.e; s.x4.o ^= s.x3.o; \
s.x2.e ^= s.x1.e; s.x2.o ^= s.x1.o; \
t0.e = s.x0.e; t0.o = s.x0.o; \
t4.e = s.x4.e; t4.o = s.x4.o; \
t3.e = s.x3.e; t3.o = s.x3.o; \
t1.e = s.x1.e; t1.o = s.x1.o; \
t2.e = s.x2.e; t2.o = s.x2.o; \
s.x0.e = t0.e ^ (~t1.e & t2.e); s.x0.o = t0.o ^ (~t1.o & t2.o); \
s.x2.e = t2.e ^ (~t3.e & t4.e); s.x2.o = t2.o ^ (~t3.o & t4.o); \
s.x4.e = t4.e ^ (~t0.e & t1.e); s.x4.o = t4.o ^ (~t0.o & t1.o); \
s.x1.e = t1.e ^ (~t2.e & t3.e); s.x1.o = t1.o ^ (~t2.o & t3.o); \
s.x3.e = t3.e ^ (~t4.e & t0.e); s.x3.o = t3.o ^ (~t4.o & t0.o); \
s.x1.e ^= s.x0.e; s.x1.o ^= s.x0.o; \
s.x3.e ^= s.x2.e; s.x3.o ^= s.x2.o; \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
/* linear layer */ \
t0.e = s.x0.e ^ ROTR32(s.x0.o, 4); t0.o = s.x0.o ^ ROTR32(s.x0.e, 5); \
t1.e = s.x1.e ^ ROTR32(s.x1.e, 11); t1.o = s.x1.o ^ ROTR32(s.x1.o, 11); \
t2.e = s.x2.e ^ ROTR32(s.x2.o, 2); t2.o = s.x2.o ^ ROTR32(s.x2.e, 3); \
t3.e = s.x3.e ^ ROTR32(s.x3.o, 3); t3.o = s.x3.o ^ ROTR32(s.x3.e, 4); \
t4.e = s.x4.e ^ ROTR32(s.x4.e, 17); t4.o = s.x4.o ^ ROTR32(s.x4.o, 17); \
s.x0.e ^= ROTR32(t0.o, 9); s.x0.o ^= ROTR32(t0.e, 10); \
s.x1.e ^= ROTR32(t1.o, 19); s.x1.o ^= ROTR32(t1.e, 20); \
s.x2.e ^= t2.o; s.x2.o ^= ROTR32(t2.e, 1); \
s.x3.e ^= ROTR32(t3.e, 5); s.x3.o ^= ROTR32(t3.o, 5); \
s.x4.e ^= ROTR32(t4.o, 3); s.x4.o ^= ROTR32(t4.e, 4); \
s.x2.e = ~s.x2.e; s.x2.o = ~s.x2.o; \
} while(0)
/* clang-format on */
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include <string.h>
#include "core.h"
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode) {
u32_2 t0, t1;
u64 tmp0, tmp1;
while (len >= RATE) {
tmp0 = (*(u64*)in);
t0 = to_bit_interleaving_big(tmp0);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
tmp1 = (*(u64*)(in + 8));
t1 = to_bit_interleaving_big(tmp1);
s->x1.e ^= t1.e;
s->x1.o ^= t1.o;
if (mode != ASCON_AD) {
tmp0 = from_bit_interleaving_big(s->x0);
*(u64*)out = (tmp0);
tmp1 = from_bit_interleaving_big(s->x1);
*(u64*)(out + 8) = (tmp1);
}
if (mode == ASCON_DEC) {
s->x0 = t0;
s->x1 = t1;
}
P(s, PB_ROUNDS);
in += RATE;
out += RATE;
len -= RATE;
}
u8 bytes[16];
memcpy(bytes, in, len);
memset(bytes + len, 0, 16 - len);
bytes[len] ^= 0x80;
u64* tmp = (u64*)bytes;
t0 = to_bit_interleaving_big(tmp[0]);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
t1 = to_bit_interleaving_big(tmp[1]);
s->x1.e ^= t1.e;
s->x1.o ^= t1.o;
if (mode != ASCON_AD) {
tmp[0] = from_bit_interleaving_big(s->x0);
tmp[1] = from_bit_interleaving_big(s->x1);
memcpy(out, bytes, len);
}
if (mode == ASCON_DEC) {
memcpy(bytes, in, len);
s->x0 = to_bit_interleaving_big(tmp[0]);
s->x1 = to_bit_interleaving_big(tmp[1]);
}
}
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode) {
u32_2 K0, K1, N0, N1;
// load key and nonce
K0 = to_bit_interleaving_big((*(u64*)k));
K1 = to_bit_interleaving_big((*(u64*)(k + 8)));
N0 = to_bit_interleaving_big((*(u64*)npub));
N1 = to_bit_interleaving_big((*(u64*)(npub + 8)));
// initialization
to_bit_interleaving_big_immediate(s->x0, IV);
s->x1.o = K0.o;
s->x1.e = K0.e;
s->x2.e = K1.e;
s->x2.o = K1.o;
s->x3.e = N0.e;
s->x3.o = N0.o;
s->x4.e = N1.e;
s->x4.o = N1.o;
P(s, PA_ROUNDS);
s->x3.e ^= K0.e;
s->x3.o ^= K0.o;
s->x4.e ^= K1.e;
s->x4.o ^= K1.o;
// process associated data
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_AD);
P(s, PB_ROUNDS);
}
s->x4.e ^= 1;
// process plaintext/ciphertext
process_data(s, out, in, tlen, mode);
// finalization
s->x2.e ^= K0.e;
s->x2.o ^= K0.o;
s->x3.e ^= K1.e;
s->x3.o ^= K1.o;
P(s, PA_ROUNDS);
s->x3.e ^= K0.e;
s->x3.o ^= K0.o;
s->x4.e ^= K1.e;
s->x4.o ^= K1.o;
}
#ifndef CORE_H_
#define CORE_H_
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define ASCON_AD 0
#define ASCON_ENC 1
#define ASCON_DEC 2
#define RATE (128 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 0 | (u64)(8 * (RATE)) << 8 | \
(u64)(PA_ROUNDS) << 16 | (u64)(PB_ROUNDS) << 24)
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode);
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode);
#endif // CORE_H_
#include "core.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
state s;
u32_2 t0, t1;
(void)nsec;
// set plaintext size
*mlen = clen - CRYPTO_ABYTES;
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC);
// verify tag (should be constant time, check compiler output)
t0 = to_bit_interleaving_big((*(u64*)(c + *mlen)));
t1 = to_bit_interleaving_big((*(u64*)(c + *mlen + 8)));
if (((s.x3.e ^ t0.e) | (s.x3.o ^ t0.o) | (s.x4.e ^ t1.e) | (s.x4.o ^ t1.o)) !=
0) {
*mlen = 0;
return -1;
}
return 0;
}
#include "core.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
state s;
u64 tmp0, tmp1;
(void)nsec;
// set ciphertext size
*clen = mlen + CRYPTO_ABYTES;
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC);
// set tag
tmp0 = from_bit_interleaving_big(s.x3);
*(u64*)(c + mlen) = (tmp0);
tmp1 = from_bit_interleaving_big(s.x4);
*(u64*)(c + mlen + 8) = (tmp1);
return 0;
}
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
Christoph Dobraunig
Martin Schläffer
Ferdinand Bachmann
#include "endian.h"
#include "permutations.h"
static const u8 constants[][2] = {
{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, {0x6, 0xc}, {0x3, 0xc},
{0x6, 0x9}, {0x3, 0x9}, {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}};
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving_big(u64 in) {
u32_2 out;
to_bit_interleaving_big_immediate(out, in);
return out;
}
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving_big(u32_2 in) {
u64 out;
from_bit_interleaving_big_immediate(out, in);
return out;
}
void P(state *p, u8 rounds) {
state s = *p;
u32_2 t0, t1, t2, t3, t4;
u32 i, start = START_ROUND(rounds);
for (i = start; i < 12; i++) ROUND(constants[i][0], constants[i][1]);
*p = s;
}
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 e;
u32 o;
} u32_2;
typedef struct {
u32_2 x0;
u32_2 x1;
u32_2 x2;
u32_2 x3;
u32_2 x4;
} state;
#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n)))))
#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n))))
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define START_ROUND(x) (12 - (x))
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving_big(u64 in);
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving_big(u32_2 in);
#define to_bit_interleaving_big_immediate(out, in) \
do { \
u64 big_in = U64BIG(in); \
u32 hi = (big_in) >> 32; \
u32 lo = (u32)(big_in); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
out.e = (lo & 0x0000FFFF) | (hi << 16); \
out.o = (lo >> 16) | (hi & 0xFFFF0000); \
} while (0)
#define from_bit_interleaving_big_immediate(out, in) \
do { \
u32 lo = (in.e & 0x0000FFFF) | (in.o << 16); \
u32 hi = (in.e >> 16) | (in.o & 0xFFFF0000); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
out = (u64)hi << 32 | lo; \
out = U64BIG(out); \
} while (0)
/* clang-format off */
#define ROUND(C_e, C_o) \
do { \
/* round constant */ \
s.x2.e ^= C_e; s.x2.o ^= C_o; \
/* s-box layer */ \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
s.x4.e ^= s.x3.e; s.x4.o ^= s.x3.o; \
s.x2.e ^= s.x1.e; s.x2.o ^= s.x1.o; \
t0.e = s.x0.e; t0.o = s.x0.o; \
t4.e = s.x4.e; t4.o = s.x4.o; \
t3.e = s.x3.e; t3.o = s.x3.o; \
t1.e = s.x1.e; t1.o = s.x1.o; \
t2.e = s.x2.e; t2.o = s.x2.o; \
s.x0.e = t0.e ^ (~t1.e & t2.e); s.x0.o = t0.o ^ (~t1.o & t2.o); \
s.x2.e = t2.e ^ (~t3.e & t4.e); s.x2.o = t2.o ^ (~t3.o & t4.o); \
s.x4.e = t4.e ^ (~t0.e & t1.e); s.x4.o = t4.o ^ (~t0.o & t1.o); \
s.x1.e = t1.e ^ (~t2.e & t3.e); s.x1.o = t1.o ^ (~t2.o & t3.o); \
s.x3.e = t3.e ^ (~t4.e & t0.e); s.x3.o = t3.o ^ (~t4.o & t0.o); \
s.x1.e ^= s.x0.e; s.x1.o ^= s.x0.o; \
s.x3.e ^= s.x2.e; s.x3.o ^= s.x2.o; \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
/* linear layer */ \
t0.e = s.x0.e ^ ROTR32(s.x0.o, 4); t0.o = s.x0.o ^ ROTR32(s.x0.e, 5); \
t1.e = s.x1.e ^ ROTR32(s.x1.e, 11); t1.o = s.x1.o ^ ROTR32(s.x1.o, 11); \
t2.e = s.x2.e ^ ROTR32(s.x2.o, 2); t2.o = s.x2.o ^ ROTR32(s.x2.e, 3); \
t3.e = s.x3.e ^ ROTR32(s.x3.o, 3); t3.o = s.x3.o ^ ROTR32(s.x3.e, 4); \
t4.e = s.x4.e ^ ROTR32(s.x4.e, 17); t4.o = s.x4.o ^ ROTR32(s.x4.o, 17); \
s.x0.e ^= ROTR32(t0.o, 9); s.x0.o ^= ROTR32(t0.e, 10); \
s.x1.e ^= ROTR32(t1.o, 19); s.x1.o ^= ROTR32(t1.e, 20); \
s.x2.e ^= t2.o; s.x2.o ^= ROTR32(t2.e, 1); \
s.x3.e ^= ROTR32(t3.e, 5); s.x3.o ^= ROTR32(t3.o, 5); \
s.x4.e ^= ROTR32(t4.o, 3); s.x4.o ^= ROTR32(t4.e, 4); \
s.x2.e = ~s.x2.e; s.x2.o = ~s.x2.o; \
} while(0)
/* clang-format on */
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include <string.h>
#include "core.h"
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode) {
u32_2 t0, t1;
u64 tmp0, tmp1;
while (len >= RATE) {
tmp0 = (*(u64*)in);
t0 = to_bit_interleaving_big(tmp0);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
tmp1 = (*(u64*)(in + 8));
t1 = to_bit_interleaving_big(tmp1);
s->x1.e ^= t1.e;
s->x1.o ^= t1.o;
if (mode != ASCON_AD) {
tmp0 = from_bit_interleaving_big(s->x0);
*(u64*)out = (tmp0);
tmp1 = from_bit_interleaving_big(s->x1);
*(u64*)(out + 8) = (tmp1);
}
if (mode == ASCON_DEC) {
s->x0 = t0;
s->x1 = t1;
}
P(s, PB_ROUNDS);
in += RATE;
out += RATE;
len -= RATE;
}
u8 bytes[16];
memcpy(bytes, in, len);
memset(bytes + len, 0, 16 - len);
bytes[len] ^= 0x80;
u64* tmp = (u64*)bytes;
t0 = to_bit_interleaving_big(tmp[0]);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
t1 = to_bit_interleaving_big(tmp[1]);
s->x1.e ^= t1.e;
s->x1.o ^= t1.o;
if (mode != ASCON_AD) {
tmp[0] = from_bit_interleaving_big(s->x0);
tmp[1] = from_bit_interleaving_big(s->x1);
memcpy(out, bytes, len);
}
if (mode == ASCON_DEC) {
memcpy(bytes, in, len);
s->x0 = to_bit_interleaving_big(tmp[0]);
s->x1 = to_bit_interleaving_big(tmp[1]);
}
}
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode) {
u32_2 K0, K1, N0, N1;
// load key and nonce
K0 = to_bit_interleaving_big((*(u64*)k));
K1 = to_bit_interleaving_big((*(u64*)(k + 8)));
N0 = to_bit_interleaving_big((*(u64*)npub));
N1 = to_bit_interleaving_big((*(u64*)(npub + 8)));
// initialization
to_bit_interleaving_big_immediate(s->x0, IV);
s->x1.o = K0.o;
s->x1.e = K0.e;
s->x2.e = K1.e;
s->x2.o = K1.o;
s->x3.e = N0.e;
s->x3.o = N0.o;
s->x4.e = N1.e;
s->x4.o = N1.o;
P(s, PA_ROUNDS);
s->x3.e ^= K0.e;
s->x3.o ^= K0.o;
s->x4.e ^= K1.e;
s->x4.o ^= K1.o;
// process associated data
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_AD);
P(s, PB_ROUNDS);
}
s->x4.e ^= 1;
// process plaintext/ciphertext
process_data(s, out, in, tlen, mode);
// finalization
s->x2.e ^= K0.e;
s->x2.o ^= K0.o;
s->x3.e ^= K1.e;
s->x3.o ^= K1.o;
P(s, PA_ROUNDS);
s->x3.e ^= K0.e;
s->x3.o ^= K0.o;
s->x4.e ^= K1.e;
s->x4.o ^= K1.o;
}
#ifndef CORE_H_
#define CORE_H_
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define ASCON_AD 0
#define ASCON_ENC 1
#define ASCON_DEC 2
#define RATE (128 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 0 | (u64)(8 * (RATE)) << 8 | \
(u64)(PA_ROUNDS) << 16 | (u64)(PB_ROUNDS) << 24)
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode);
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode);
#endif // CORE_H_
#include "core.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
state s;
u32_2 t0, t1;
(void)nsec;
// set plaintext size
*mlen = clen - CRYPTO_ABYTES;
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC);
// verify tag (should be constant time, check compiler output)
t0 = to_bit_interleaving_big((*(u64*)(c + *mlen)));
t1 = to_bit_interleaving_big((*(u64*)(c + *mlen + 8)));
if (((s.x3.e ^ t0.e) | (s.x3.o ^ t0.o) | (s.x4.e ^ t1.e) | (s.x4.o ^ t1.o)) !=
0) {
*mlen = 0;
return -1;
}
return 0;
}
#include "core.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
state s;
u64 tmp0, tmp1;
(void)nsec;
// set ciphertext size
*clen = mlen + CRYPTO_ABYTES;
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC);
// set tag
tmp0 = from_bit_interleaving_big(s.x3);
*(u64*)(c + mlen) = (tmp0);
tmp1 = from_bit_interleaving_big(s.x4);
*(u64*)(c + mlen + 8) = (tmp1);
return 0;
}
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
Christoph Dobraunig
Martin Schläffer
Ferdinand Bachmann
#include "endian.h"
#include "permutations.h"
static const u8 constants[][2] = {
{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, {0x6, 0xc}, {0x3, 0xc},
{0x6, 0x9}, {0x3, 0x9}, {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}};
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving_big(u64 in) {
u32_2 out;
to_bit_interleaving_big_immediate(out, in);
return out;
}
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving_big(u32_2 in) {
u64 out;
from_bit_interleaving_big_immediate(out, in);
return out;
}
void P(state *p, u8 rounds) {
state s = *p;
u32_2 t0;
u32 i, start = START_ROUND(rounds);
for (i = start; i < 12; i++) ROUND(constants[i][0], constants[i][1]);
*p = s;
}
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 e;
u32 o;
} u32_2;
typedef struct {
u32_2 x0;
u32_2 x1;
u32_2 x2;
u32_2 x3;
u32_2 x4;
} state;
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define START_ROUND(x) (12 - (x))
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving_big(u64 in);
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving_big(u32_2 in);
#define to_bit_interleaving_big_immediate(out, in) \
do { \
u64 big_in = U64BIG(in); \
u32 hi = (big_in) >> 32; \
u32 lo = (u32)(big_in); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
out.e = (lo & 0x0000FFFF) | (hi << 16); \
out.o = (lo >> 16) | (hi & 0xFFFF0000); \
} while (0)
#define from_bit_interleaving_big_immediate(out, in) \
do { \
u32 lo = (in.e & 0x0000FFFF) | (in.o << 16); \
u32 hi = (in.e >> 16) | (in.o & 0xFFFF0000); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
out = (u64)hi << 32 | lo; \
out = U64BIG(out); \
} while (0)
/* clang-format off */
#define ROUND(C_e, C_o) \
do { \
/* round constant */ \
s.x2.e ^= C_e; s.x2.o ^= C_o; \
/* s-bos.x layer */ \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
s.x4.e ^= s.x3.e; s.x4.o ^= s.x3.o; \
s.x2.e ^= s.x1.e; s.x2.o ^= s.x1.o; \
t0.e = s.x0.e & (~s.x4.e); t0.o = s.x0.o & (~s.x4.o); \
s.x0.e ^= s.x2.e & (~s.x1.e); s.x0.o ^= s.x2.o & (~s.x1.o); \
s.x2.e ^= s.x4.e & (~s.x3.e); s.x2.o ^= s.x4.o & (~s.x3.o); \
s.x4.e ^= s.x1.e & (~s.x0.e); s.x4.o ^= s.x1.o & (~s.x0.o); \
s.x1.e ^= s.x3.e & (~s.x2.e); s.x1.o ^= s.x3.o & (~s.x2.o); \
s.x3.e ^= t0.e; s.x3.o ^= t0.o; \
s.x1.e ^= s.x0.e; s.x1.o ^= s.x0.o; \
s.x3.e ^= s.x2.e; s.x3.o ^= s.x2.o; \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
/* linear layer */ \
t0.e = s.x0.e ^ ROTR32(s.x0.o, 4); \
t0.o = s.x0.o ^ ROTR32(s.x0.e, 5); \
s.x0.e ^= ROTR32(t0.o, 9); \
s.x0.o ^= ROTR32(t0.e, 10); \
t0.e = s.x1.e ^ ROTR32(s.x1.e, 11); \
t0.o = s.x1.o ^ ROTR32(s.x1.o, 11); \
s.x1.e ^= ROTR32(t0.o, 19); \
s.x1.o ^= ROTR32(t0.e, 20); \
t0.e = s.x2.e ^ ROTR32(s.x2.o, 2); \
t0.o = s.x2.o ^ ROTR32(s.x2.e, 3); \
s.x2.e ^= t0.o; \
s.x2.o ^= ROTR32(t0.e, 1); \
t0.e = s.x3.e ^ ROTR32(s.x3.o, 3); \
t0.o = s.x3.o ^ ROTR32(s.x3.e, 4); \
s.x3.e ^= ROTR32(t0.e, 5); \
s.x3.o ^= ROTR32(t0.o, 5); \
t0.e = s.x4.e ^ ROTR32(s.x4.e, 17); \
t0.o = s.x4.o ^ ROTR32(s.x4.o, 17); \
s.x4.e ^= ROTR32(t0.o, 3); \
s.x4.o ^= ROTR32(t0.e, 4); \
s.x2.e = ~s.x2.e; s.x2.o = ~s.x2.o; \
} while(0)
/* clang-format on */
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include <string.h>
#include "core.h"
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode) {
u32_2 t0, t1;
u64 tmp0, tmp1;
while (len >= RATE) {
tmp0 = (*(u64*)in);
t0 = to_big(tmp0);
s->x0.h ^= t0.h;
s->x0.l ^= t0.l;
tmp1 = (*(u64*)(in + 8));
t1 = to_big(tmp1);
s->x1.h ^= t1.h;
s->x1.l ^= t1.l;
if (mode != ASCON_AD) {
tmp0 = from_big(s->x0);
*(u64*)out = (tmp0);
tmp1 = from_big(s->x1);
*(u64*)(out + 8) = (tmp1);
}
if (mode == ASCON_DEC) {
s->x0 = t0;
s->x1 = t1;
}
P(s, START_ROUND(PB_ROUNDS));
in += RATE;
out += RATE;
len -= RATE;
}
u8 bytes[16];
memcpy(bytes, in, len);
memset(bytes + len, 0, 16 - len);
bytes[len] ^= 0x80;
u64* tmp = (u64*)bytes;
t0 = to_big(tmp[0]);
s->x0.h ^= t0.h;
s->x0.l ^= t0.l;
t1 = to_big(tmp[1]);
s->x1.h ^= t1.h;
s->x1.l ^= t1.l;
if (mode != ASCON_AD) {
tmp[0] = from_big(s->x0);
tmp[1] = from_big(s->x1);
memcpy(out, bytes, len);
}
if (mode == ASCON_DEC) {
memcpy(bytes, in, len);
s->x0 = to_big(tmp[0]);
s->x1 = to_big(tmp[1]);
}
}
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode) {
u32_2 K0, K1, N0, N1;
// load key and nonce
K0 = to_big((*(u64*)k));
K1 = to_big((*(u64*)(k + 8)));
N0 = to_big((*(u64*)npub));
N1 = to_big((*(u64*)(npub + 8)));
// initialization
to_big_immediate(s->x0, IV);
s->x1.h = K0.h;
s->x1.l = K0.l;
s->x2.h = K1.h;
s->x2.l = K1.l;
s->x3.h = N0.h;
s->x3.l = N0.l;
s->x4.h = N1.h;
s->x4.l = N1.l;
P(s, START_ROUND(PA_ROUNDS));
s->x3.h ^= K0.h;
s->x3.l ^= K0.l;
s->x4.h ^= K1.h;
s->x4.l ^= K1.l;
// process associated data
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_AD);
P(s, START_ROUND(PB_ROUNDS));
}
s->x4.l ^= 1;
// process plaintext/ciphertext
process_data(s, out, in, tlen, mode);
// finalization
s->x2.h ^= K0.h;
s->x2.l ^= K0.l;
s->x3.h ^= K1.h;
s->x3.l ^= K1.l;
P(s, START_ROUND(PA_ROUNDS));
s->x3.h ^= K0.h;
s->x3.l ^= K0.l;
s->x4.h ^= K1.h;
s->x4.l ^= K1.l;
}
#ifndef CORE_H_
#define CORE_H_
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define ASCON_AD 0
#define ASCON_ENC 1
#define ASCON_DEC 2
#define RATE (128 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 0 | (u64)(8 * (RATE)) << 8 | \
(u64)(PA_ROUNDS) << 16 | (u64)(PB_ROUNDS) << 24)
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode);
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode);
#endif // CORE_H_
#include "core.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
state s;
u32_2 t0, t1;
(void)nsec;
// set plaintext size
*mlen = clen - CRYPTO_ABYTES;
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC);
// verify tag (should be constant time, check compiler output)
t0 = to_big((*(u64*)(c + *mlen)));
t1 = to_big((*(u64*)(c + *mlen + 8)));
if (((s.x3.h ^ t0.h) | (s.x3.l ^ t0.l) | (s.x4.h ^ t1.h) | (s.x4.l ^ t1.l)) !=
0) {
*mlen = 0;
return -1;
}
return 0;
}
#include "core.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
state s;
u64 tmp0, tmp1;
(void)nsec;
// set ciphertext size
*clen = mlen + CRYPTO_ABYTES;
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC);
// set tag
tmp0 = from_big(s.x3);
*(u64*)(c + mlen) = (tmp0);
tmp1 = from_big(s.x4);
*(u64*)(c + mlen + 8) = (tmp1);
return 0;
}
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
Christoph Dobraunig
Martin Schläffer
Ferdinand Bachmann
#include "endian.h"
#include "permutations.h"
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_big(u64 in) {
u32_2 out;
to_big_immediate(out, in);
return out;
}
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_big(u32_2 in) {
u64 out;
from_big_immediate(out, in);
return out;
}
/*
void P(state *p, u8 start_round) {
// implemented in asm
}
*/
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 h;
u32 l;
} u32_2;
typedef struct {
u32_2 x0;
u32_2 x1;
u32_2 x2;
u32_2 x3;
u32_2 x4;
} state;
#define START_ROUND(x) (12 - (x))
u32_2 to_big(u64 in);
u64 from_big(u32_2 in);
#define to_big_immediate(out, in) \
do { \
u64 big_in = U64BIG(in); \
u32 hi = (big_in) >> 32; \
u32 lo = (u32)(big_in); \
out.h = hi; \
out.l = lo; \
} while (0)
#define from_big_immediate(out, in) \
do { \
u32 hi = in.h; \
u32 lo = in.l; \
out = (u64)hi << 32 | lo; \
out = U64BIG(out); \
} while (0)
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#include <xtensa/coreasm.h>
#define tmp0 a2
#define rnd a3
#define x0h a4
#define x0l a5
#define x1h a6
#define x1l a7
#define x2h a8
#define x2l a9
#define x3h a10
#define x3l a11
#define x4h a12
#define x4l a13
#define t0h a14
#define t0l a15
#define tmp1 a14
#define tmp2 a15
.section .text
.align 4
.global P
.type P,@function
P:
abi_entry 4
l32i x0h, a2, 0
l32i x0l, a2, 4
l32i x1h, a2, 8
l32i x1l, a2, 12
l32i x2h, a2, 16
l32i x2l, a2, 20
l32i x3h, a2, 24
l32i x3l, a2, 28
l32i x4h, a2, 32
l32i x4l, a2, 36
s32i a2, a1, 0
.Lround_start:
/* round constant */
movi tmp0, 15
sub tmp0, tmp0, rnd
slli tmp0, tmp0, 4
or tmp0, tmp0, rnd
xor x2l, x2l, tmp0
/* s-box layer */
movi tmp0, -1
/* high */
xor x0h, x0h, x4h
xor x4h, x4h, x3h
xor x2h, x2h, x1h
xor tmp1, tmp0, x4h
and tmp2, tmp1, x0h
xor tmp1, tmp0, x1h
and tmp1, tmp1, x2h
xor x0h, x0h, tmp1
xor tmp1, tmp0, x3h
and tmp1, tmp1, x4h
xor x2h, x2h, tmp1
xor tmp1, tmp0, x0h
and tmp1, tmp1, x1h
xor x4h, x4h, tmp1
xor tmp1, tmp0, x2h
and tmp1, tmp1, x3h
xor x1h, x1h, tmp1
xor x3h, x3h, tmp2
xor x1h, x1h, x0h
xor x3h, x3h, x2h
xor x0h, x0h, x4h
xor x2h, x2h, tmp0
/* low */
xor x0l, x0l, x4l
xor x4l, x4l, x3l
xor x2l, x2l, x1l
xor tmp1, tmp0, x4l
and tmp2, tmp1, x0l
xor tmp1, tmp0, x1l
and tmp1, tmp1, x2l
xor x0l, x0l, tmp1
xor tmp1, tmp0, x3l
and tmp1, tmp1, x4l
xor x2l, x2l, tmp1
xor tmp1, tmp0, x0l
and tmp1, tmp1, x1l
xor x4l, x4l, tmp1
xor tmp1, tmp0, x2l
and tmp1, tmp1, x3l
xor x1l, x1l, tmp1
xor x3l, x3l, tmp2
xor x1l, x1l, x0l
xor x3l, x3l, x2l
xor x0l, x0l, x4l
xor x2l, x2l, tmp0
/* linear layer */
ssai 19
src t0l, x0h, x0l
src t0h, x0l, x0h
xor t0l, t0l, x0l
xor t0h, t0h, x0h
ssai 28
src tmp0, x0h, x0l
src x0h, x0l, x0h
xor x0l, tmp0, t0l
xor x0h, x0h, t0h
ssai 29 /* inverted 61 */
src t0l, x1l, x1h
src t0h, x1h, x1l
xor t0l, t0l, x1l
xor t0h, t0h, x1h
ssai 7 /* inverted 39 */
src tmp0, x1l, x1h
src x1h, x1h, x1l
xor x1l, tmp0, t0l
xor x1h, x1h, t0h
ssai 1
src t0l, x2h, x2l
src t0h, x2l, x2h
xor t0l, t0l, x2l
xor t0h, t0h, x2h
ssai 6
src tmp0, x2h, x2l
src x2h, x2l, x2h
xor x2l, tmp0, t0l
xor x2h, x2h, t0h
ssai 10
src t0l, x3h, x3l
src t0h, x3l, x3h
xor t0l, t0l, x3l
xor t0h, t0h, x3h
ssai 17
src tmp0, x3h, x3l
src x3h, x3l, x3h
xor x3l, tmp0, t0l
xor x3h, x3h, t0h
ssai 7
src t0l, x4h, x4l
src t0h, x4l, x4h
xor t0l, t0l, x4l
xor t0h, t0h, x4h
ssai 9 /* inverted 41 */
src tmp0, x4l, x4h
src x4h, x4h, x4l
xor x4l, tmp0, t0l
xor x4h, x4h, t0h
/* loop */
addi rnd, rnd, 1
bltui rnd, 12, .Lround_start
.Lend:
l32i a2, a1, 0
s32i x0h, a2, 0
s32i x0l, a2, 4
s32i x1h, a2, 8
s32i x1l, a2, 12
s32i x2h, a2, 16
s32i x2l, a2, 20
s32i x3h, a2, 24
s32i x3l, a2, 28
s32i x4h, a2, 32
s32i x4l, a2, 36
abi_return
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include <string.h>
#include "core.h"
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode) {
u32_2 t0;
u64 tmp0;
while (len >= RATE) {
tmp0 = (*(u64*)in);
t0 = to_bit_interleaving_big(tmp0);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
if (mode != ASCON_AD) {
tmp0 = from_bit_interleaving_big(s->x0);
*(u64*)out = (tmp0);
}
if (mode == ASCON_DEC) {
s->x0 = t0;
}
P(s, START_ROUND(PB_ROUNDS));
in += RATE;
out += RATE;
len -= RATE;
}
u8 bytes[8];
memcpy(bytes, in, len);
memset(bytes + len, 0, 8 - len);
bytes[len] ^= 0x80;
u64* tmp = (u64*)bytes;
t0 = to_bit_interleaving_big(tmp[0]);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
if (mode != ASCON_AD) {
tmp[0] = from_bit_interleaving_big(s->x0);
memcpy(out, bytes, len);
}
if (mode == ASCON_DEC) {
memcpy(bytes, in, len);
s->x0 = to_bit_interleaving_big(tmp[0]);
}
}
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode) {
u32_2 K0, K1, N0, N1;
// load key and nonce
K0 = to_bit_interleaving_big((*(u64*)k));
K1 = to_bit_interleaving_big((*(u64*)(k + 8)));
N0 = to_bit_interleaving_big((*(u64*)npub));
N1 = to_bit_interleaving_big((*(u64*)(npub + 8)));
// initialization
to_bit_interleaving_big_immediate(s->x0, IV);
s->x1.o = K0.o;
s->x1.e = K0.e;
s->x2.e = K1.e;
s->x2.o = K1.o;
s->x3.e = N0.e;
s->x3.o = N0.o;
s->x4.e = N1.e;
s->x4.o = N1.o;
P(s, START_ROUND(PA_ROUNDS));
s->x3.e ^= K0.e;
s->x3.o ^= K0.o;
s->x4.e ^= K1.e;
s->x4.o ^= K1.o;
// process associated data
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_AD);
P(s, START_ROUND(PB_ROUNDS));
}
s->x4.e ^= 1;
// process plaintext/ciphertext
process_data(s, out, in, tlen, mode);
// finalization
s->x1.e ^= K0.e;
s->x1.o ^= K0.o;
s->x2.e ^= K1.e;
s->x2.o ^= K1.o;
P(s, START_ROUND(PA_ROUNDS));
s->x3.e ^= K0.e;
s->x3.o ^= K0.o;
s->x4.e ^= K1.e;
s->x4.o ^= K1.o;
}
#ifndef CORE_H_
#define CORE_H_
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define ASCON_AD 0
#define ASCON_ENC 1
#define ASCON_DEC 2
#define RATE (64 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 0 | (u64)(8 * (RATE)) << 8 | \
(u64)(PA_ROUNDS) << 16 | (u64)(PB_ROUNDS) << 24)
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode);
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode);
#endif // CORE_H_
#include "core.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
state s;
u32_2 t0, t1;
(void)nsec;
// set plaintext size
*mlen = clen - CRYPTO_ABYTES;
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC);
// verify tag (should be constant time, check compiler output)
t0 = to_bit_interleaving_big((*(u64*)(c + *mlen)));
t1 = to_bit_interleaving_big((*(u64*)(c + *mlen + 8)));
if (((s.x3.e ^ t0.e) | (s.x3.o ^ t0.o) | (s.x4.e ^ t1.e) | (s.x4.o ^ t1.o)) !=
0) {
*mlen = 0;
return -1;
}
return 0;
}
#include "core.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
state s;
u64 tmp0, tmp1;
(void)nsec;
// set ciphertext size
*clen = mlen + CRYPTO_ABYTES;
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC);
// set tag
tmp0 = from_bit_interleaving_big(s.x3);
*(u64*)(c + mlen) = (tmp0);
tmp1 = from_bit_interleaving_big(s.x4);
*(u64*)(c + mlen + 8) = (tmp1);
return 0;
}
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
Christoph Dobraunig
Martin Schläffer
Ferdinand Bachmann
#include "endian.h"
#include "permutations.h"
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving_big(u64 in) {
u32_2 out;
to_bit_interleaving_big_immediate(out, in);
return out;
}
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving_big(u32_2 in) {
u64 out;
from_bit_interleaving_big_immediate(out, in);
return out;
}
/*
void P(state *p, u8 start_round) {
// implemented in asm
}
*/
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 e;
u32 o;
} u32_2;
typedef struct {
u32_2 x0;
u32_2 x1;
u32_2 x2;
u32_2 x3;
u32_2 x4;
} state;
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define START_ROUND(x) (12 - (x))
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving_big(u64 in);
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving_big(u32_2 in);
#define to_bit_interleaving_big_immediate(out, in) \
do { \
u64 big_in = U64BIG(in); \
u32 hi = (big_in) >> 32; \
u32 lo = (u32)(big_in); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
out.e = (lo & 0x0000FFFF) | (hi << 16); \
out.o = (lo >> 16) | (hi & 0xFFFF0000); \
} while (0)
#define from_bit_interleaving_big_immediate(out, in) \
do { \
u32 lo = (in.e & 0x0000FFFF) | (in.o << 16); \
u32 hi = (in.e >> 16) | (in.o & 0xFFFF0000); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
out = (u64)hi << 32 | lo; \
out = U64BIG(out); \
} while (0)
/* clang-format off */
#define ROUND(C_e, C_o) \
do { \
/* round constant */ \
s.x2.e ^= C_e; s.x2.o ^= C_o; \
/* s-bos.x layer */ \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
s.x4.e ^= s.x3.e; s.x4.o ^= s.x3.o; \
s.x2.e ^= s.x1.e; s.x2.o ^= s.x1.o; \
t0.e = s.x0.e & (~s.x4.e); t0.o = s.x0.o & (~s.x4.o); \
s.x0.e ^= s.x2.e & (~s.x1.e); s.x0.o ^= s.x2.o & (~s.x1.o); \
s.x2.e ^= s.x4.e & (~s.x3.e); s.x2.o ^= s.x4.o & (~s.x3.o); \
s.x4.e ^= s.x1.e & (~s.x0.e); s.x4.o ^= s.x1.o & (~s.x0.o); \
s.x1.e ^= s.x3.e & (~s.x2.e); s.x1.o ^= s.x3.o & (~s.x2.o); \
s.x3.e ^= t0.e; s.x3.o ^= t0.o; \
s.x1.e ^= s.x0.e; s.x1.o ^= s.x0.o; \
s.x3.e ^= s.x2.e; s.x3.o ^= s.x2.o; \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
/* linear layer */ \
t0.e = s.x0.e ^ ROTR32(s.x0.o, 4); \
t0.o = s.x0.o ^ ROTR32(s.x0.e, 5); \
s.x0.e ^= ROTR32(t0.o, 9); \
s.x0.o ^= ROTR32(t0.e, 10); \
t0.e = s.x1.e ^ ROTR32(s.x1.e, 11); \
t0.o = s.x1.o ^ ROTR32(s.x1.o, 11); \
s.x1.e ^= ROTR32(t0.o, 19); \
s.x1.o ^= ROTR32(t0.e, 20); \
t0.e = s.x2.e ^ ROTR32(s.x2.o, 2); \
t0.o = s.x2.o ^ ROTR32(s.x2.e, 3); \
s.x2.e ^= t0.o; \
s.x2.o ^= ROTR32(t0.e, 1); \
t0.e = s.x3.e ^ ROTR32(s.x3.o, 3); \
t0.o = s.x3.o ^ ROTR32(s.x3.e, 4); \
s.x3.e ^= ROTR32(t0.e, 5); \
s.x3.o ^= ROTR32(t0.o, 5); \
t0.e = s.x4.e ^ ROTR32(s.x4.e, 17); \
t0.o = s.x4.o ^ ROTR32(s.x4.o, 17); \
s.x4.e ^= ROTR32(t0.o, 3); \
s.x4.o ^= ROTR32(t0.e, 4); \
s.x2.e = ~s.x2.e; s.x2.o = ~s.x2.o; \
} while(0)
/* clang-format on */
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#include <xtensa/coreasm.h>
.section .data
.align 2
.global ascon_round_constants
.type ascon_round_constants,@object
ascon_round_constants:
.byte 0xc, 0xc
.byte 0x9, 0xc
.byte 0xc, 0x9
.byte 0x9, 0x9
.byte 0x6, 0xc
.byte 0x3, 0xc
.byte 0x6, 0x9
.byte 0x3, 0x9
.byte 0xc, 0x6
.byte 0x9, 0x6
.byte 0xc, 0x3
.byte 0x9, 0x3
#define x0e a4
#define x0o a5
#define x1e a6
#define x1o a7
#define x2e a8
#define x2o a9
#define x3e a10
#define x3o a11
#define x4e a12
#define x4o a13
#define t0e a14
#define t0o a15
#define tmp a14
#define neg a15
.section .text
.align 4
.global P
.type P,@function
P:
abi_entry 4
l32i x0e, a2, 0
l32i x0o, a2, 4
l32i x1e, a2, 8
l32i x1o, a2, 12
l32i x2e, a2, 16
l32i x2o, a2, 20
l32i x3e, a2, 24
l32i x3o, a2, 28
l32i x4e, a2, 32
l32i x4o, a2, 36
s32i a2, a1, 0
movi a2, ascon_round_constants
addx2 a2, a3, a2
.Lround_start:
/* round constant */
l8ui t0e, a2, 0
xor x2e, x2e, a14
l8ui t0o, a2, 1
xor x2o, x2o, a15
/* s-box layer */
/* even */
xor x0e, x0e, x4e
xor x4e, x4e, x3e
xor x2e, x2e, x1e
movi neg, -1
xor neg, neg, x4e
and tmp, neg, x0e
movi neg, -1
xor neg, neg, x1e
and neg, neg, x2e
xor x0e, x0e, neg
movi neg, -1
xor neg, neg, x3e
and neg, neg, x4e
xor x2e, x2e, neg
movi neg, -1
xor neg, neg, x0e
and neg, neg, x1e
xor x4e, x4e, neg
movi neg, -1
xor neg, neg, x2e
and neg, neg, x3e
xor x1e, x1e, neg
xor x3e, x3e, tmp
xor x1e, x1e, x0e
xor x3e, x3e, x2e
xor x0e, x0e, x4e
/* odd */
xor x0o, x0o, x4o
xor x4o, x4o, x3o
xor x2o, x2o, x1o
movi neg, -1
xor neg, neg, x4o
and tmp, neg, x0o
movi neg, -1
xor neg, neg, x1o
and neg, neg, x2o
xor x0o, x0o, neg
movi neg, -1
xor neg, neg, x3o
and neg, neg, x4o
xor x2o, x2o, neg
movi neg, -1
xor neg, neg, x0o
and neg, neg, x1o
xor x4o, x4o, neg
movi neg, -1
xor neg, neg, x2o
and neg, neg, x3o
xor x1o, x1o, neg
xor x3o, x3o, tmp
xor x1o, x1o, x0o
xor x3o, x3o, x2o
xor x0o, x0o, x4o
/* linear layer */
ssai 4
src t0e, x0o, x0o
xor t0e, t0e, x0e
ssai 5
src t0o, x0e, x0e
xor t0o, t0o, x0o
ssai 9
src t0o, t0o, t0o
xor x0e, x0e, t0o
ssai 10
src t0e, t0e, t0e
xor x0o, x0o, t0e
ssai 11
src t0e, x1e, x1e
xor t0e, t0e, x1e
src t0o, x1o, x1o
xor t0o, t0o, x1o
ssai 19
src t0o, t0o, t0o
xor x1e, x1e, t0o
ssai 20
src t0e, t0e, t0e
xor x1o, x1o, t0e
ssai 2
src t0e, x2o, x2o
xor t0e, t0e, x2e
ssai 3
src t0o, x2e, x2e
xor t0o, t0o, x2o
xor x2e, x2e, t0o
ssai 1
src t0e, t0e, t0e
xor x2o, x2o, t0e
ssai 3
src t0e, x3o, x3o
xor t0e, t0e, x3e
ssai 4
src t0o, x3e, x3e
xor t0o, t0o, x3o
ssai 5
src t0e, t0e, t0e
xor x3e, x3e, t0e
src t0o, t0o, t0o
xor x3o, x3o, t0o
ssai 17
src t0e, x4e, x4e
xor t0e, t0e, x4e
src t0o, x4o, x4o
xor t0o, t0o, x4o
ssai 3
src t0o, t0o, t0o
xor x4e, x4e, t0o
ssai 4
src t0e, t0e, t0e
xor x4o, x4o, t0e
movi neg, -1
xor x2e, x2e, neg
xor x2o, x2o, neg
/* loop */
addi a2, a2, 2
addi a3, a3, 1
bltui a3, 12, .Lround_start
.Lend:
l32i a2, a1, 0
s32i x0e, a2, 0
s32i x0o, a2, 4
s32i x1e, a2, 8
s32i x1o, a2, 12
s32i x2e, a2, 16
s32i x2o, a2, 20
s32i x3e, a2, 24
s32i x3o, a2, 28
s32i x4e, a2, 32
s32i x4o, a2, 36
abi_return
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include <string.h>
#include "core.h"
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode) {
u32_2 t0;
u64 tmp0;
while (len >= RATE) {
tmp0 = (*(u64*)in);
t0 = to_bit_interleaving_big(tmp0);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
if (mode != ASCON_AD) {
tmp0 = from_bit_interleaving_big(s->x0);
*(u64*)out = (tmp0);
}
if (mode == ASCON_DEC) {
s->x0 = t0;
}
P(s, PB_ROUNDS);
in += RATE;
out += RATE;
len -= RATE;
}
u8 bytes[8];
memcpy(bytes, in, len);
memset(bytes + len, 0, 8 - len);
bytes[len] ^= 0x80;
u64* tmp = (u64*)bytes;
t0 = to_bit_interleaving_big(tmp[0]);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
if (mode != ASCON_AD) {
tmp[0] = from_bit_interleaving_big(s->x0);
memcpy(out, bytes, len);
}
if (mode == ASCON_DEC) {
memcpy(bytes, in, len);
s->x0 = to_bit_interleaving_big(tmp[0]);
}
}
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode) {
u32_2 K0, K1, N0, N1;
// load key and nonce
K0 = to_bit_interleaving_big((*(u64*)k));
K1 = to_bit_interleaving_big((*(u64*)(k + 8)));
N0 = to_bit_interleaving_big((*(u64*)npub));
N1 = to_bit_interleaving_big((*(u64*)(npub + 8)));
// initialization
to_bit_interleaving_big_immediate(s->x0, IV);
s->x1.o = K0.o;
s->x1.e = K0.e;
s->x2.e = K1.e;
s->x2.o = K1.o;
s->x3.e = N0.e;
s->x3.o = N0.o;
s->x4.e = N1.e;
s->x4.o = N1.o;
P(s, PA_ROUNDS);
s->x3.e ^= K0.e;
s->x3.o ^= K0.o;
s->x4.e ^= K1.e;
s->x4.o ^= K1.o;
// process associated data
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_AD);
P(s, PB_ROUNDS);
}
s->x4.e ^= 1;
// process plaintext/ciphertext
process_data(s, out, in, tlen, mode);
// finalization
s->x1.e ^= K0.e;
s->x1.o ^= K0.o;
s->x2.e ^= K1.e;
s->x2.o ^= K1.o;
P(s, PA_ROUNDS);
s->x3.e ^= K0.e;
s->x3.o ^= K0.o;
s->x4.e ^= K1.e;
s->x4.o ^= K1.o;
}
#ifndef CORE_H_
#define CORE_H_
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define ASCON_AD 0
#define ASCON_ENC 1
#define ASCON_DEC 2
#define RATE (64 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 0 | (u64)(8 * (RATE)) << 8 | \
(u64)(PA_ROUNDS) << 16 | (u64)(PB_ROUNDS) << 24)
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode);
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode);
#endif // CORE_H_
#include "core.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
state s;
u32_2 t0, t1;
(void)nsec;
// set plaintext size
*mlen = clen - CRYPTO_ABYTES;
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC);
// verify tag (should be constant time, check compiler output)
t0 = to_bit_interleaving_big((*(u64*)(c + *mlen)));
t1 = to_bit_interleaving_big((*(u64*)(c + *mlen + 8)));
if (((s.x3.e ^ t0.e) | (s.x3.o ^ t0.o) | (s.x4.e ^ t1.e) | (s.x4.o ^ t1.o)) !=
0) {
*mlen = 0;
return -1;
}
return 0;
}
#include "core.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
state s;
u64 tmp0, tmp1;
(void)nsec;
// set ciphertext size
*clen = mlen + CRYPTO_ABYTES;
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC);
// set tag
tmp0 = from_bit_interleaving_big(s.x3);
*(u64*)(c + mlen) = (tmp0);
tmp1 = from_bit_interleaving_big(s.x4);
*(u64*)(c + mlen + 8) = (tmp1);
return 0;
}
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
Christoph Dobraunig
Martin Schläffer
Ferdinand Bachmann
#include "endian.h"
#include "permutations.h"
static const u8 constants[][2] = {
{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, {0x6, 0xc}, {0x3, 0xc},
{0x6, 0x9}, {0x3, 0x9}, {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}};
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving_big(u64 in) {
u32_2 out;
to_bit_interleaving_big_immediate(out, in);
return out;
}
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving_big(u32_2 in) {
u64 out;
from_bit_interleaving_big_immediate(out, in);
return out;
}
void P(state *p, u8 rounds) {
state s = *p;
u32_2 t0;
u32 i, start = START_ROUND(rounds);
for (i = start; i < 12; i++) ROUND(constants[i][0], constants[i][1]);
*p = s;
}
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 e;
u32 o;
} u32_2;
typedef struct {
u32_2 x0;
u32_2 x1;
u32_2 x2;
u32_2 x3;
u32_2 x4;
} state;
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define START_ROUND(x) (12 - (x))
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving_big(u64 in);
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving_big(u32_2 in);
#define to_bit_interleaving_big_immediate(out, in) \
do { \
u64 big_in = U64BIG(in); \
u32 hi = (big_in) >> 32; \
u32 lo = (u32)(big_in); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
out.e = (lo & 0x0000FFFF) | (hi << 16); \
out.o = (lo >> 16) | (hi & 0xFFFF0000); \
} while (0)
#define from_bit_interleaving_big_immediate(out, in) \
do { \
u32 lo = (in.e & 0x0000FFFF) | (in.o << 16); \
u32 hi = (in.e >> 16) | (in.o & 0xFFFF0000); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
out = (u64)hi << 32 | lo; \
out = U64BIG(out); \
} while (0)
/* clang-format off */
#define ROUND(C_e, C_o) \
do { \
/* round constant */ \
s.x2.e ^= C_e; s.x2.o ^= C_o; \
/* s-bos.x layer */ \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
s.x4.e ^= s.x3.e; s.x4.o ^= s.x3.o; \
s.x2.e ^= s.x1.e; s.x2.o ^= s.x1.o; \
t0.e = s.x0.e & (~s.x4.e); t0.o = s.x0.o & (~s.x4.o); \
s.x0.e ^= s.x2.e & (~s.x1.e); s.x0.o ^= s.x2.o & (~s.x1.o); \
s.x2.e ^= s.x4.e & (~s.x3.e); s.x2.o ^= s.x4.o & (~s.x3.o); \
s.x4.e ^= s.x1.e & (~s.x0.e); s.x4.o ^= s.x1.o & (~s.x0.o); \
s.x1.e ^= s.x3.e & (~s.x2.e); s.x1.o ^= s.x3.o & (~s.x2.o); \
s.x3.e ^= t0.e; s.x3.o ^= t0.o; \
s.x1.e ^= s.x0.e; s.x1.o ^= s.x0.o; \
s.x3.e ^= s.x2.e; s.x3.o ^= s.x2.o; \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
/* linear layer */ \
t0.e = s.x0.e ^ ROTR32(s.x0.o, 4); \
t0.o = s.x0.o ^ ROTR32(s.x0.e, 5); \
s.x0.e ^= ROTR32(t0.o, 9); \
s.x0.o ^= ROTR32(t0.e, 10); \
t0.e = s.x1.e ^ ROTR32(s.x1.e, 11); \
t0.o = s.x1.o ^ ROTR32(s.x1.o, 11); \
s.x1.e ^= ROTR32(t0.o, 19); \
s.x1.o ^= ROTR32(t0.e, 20); \
t0.e = s.x2.e ^ ROTR32(s.x2.o, 2); \
t0.o = s.x2.o ^ ROTR32(s.x2.e, 3); \
s.x2.e ^= t0.o; \
s.x2.o ^= ROTR32(t0.e, 1); \
t0.e = s.x3.e ^ ROTR32(s.x3.o, 3); \
t0.o = s.x3.o ^ ROTR32(s.x3.e, 4); \
s.x3.e ^= ROTR32(t0.e, 5); \
s.x3.o ^= ROTR32(t0.o, 5); \
t0.e = s.x4.e ^ ROTR32(s.x4.e, 17); \
t0.o = s.x4.o ^ ROTR32(s.x4.o, 17); \
s.x4.e ^= ROTR32(t0.o, 3); \
s.x4.o ^= ROTR32(t0.e, 4); \
s.x2.e = ~s.x2.e; s.x2.o = ~s.x2.o; \
} while(0)
/* clang-format on */
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include <string.h>
#include "core.h"
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode) {
u32_2 t0;
u64 tmp0;
while (len >= RATE) {
tmp0 = (*(u64*)in);
t0 = to_big(tmp0);
s->x0.h ^= t0.h;
s->x0.l ^= t0.l;
if (mode != ASCON_AD) {
tmp0 = from_big(s->x0);
*(u64*)out = (tmp0);
}
if (mode == ASCON_DEC) {
s->x0 = t0;
}
P(s, START_ROUND(PB_ROUNDS));
in += RATE;
out += RATE;
len -= RATE;
}
u8 bytes[8];
memcpy(bytes, in, len);
memset(bytes + len, 0, 8 - len);
bytes[len] ^= 0x80;
u64* tmp = (u64*)bytes;
t0 = to_big(tmp[0]);
s->x0.h ^= t0.h;
s->x0.l ^= t0.l;
if (mode != ASCON_AD) {
tmp[0] = from_big(s->x0);
memcpy(out, bytes, len);
}
if (mode == ASCON_DEC) {
memcpy(bytes, in, len);
s->x0 = to_big(tmp[0]);
}
}
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode) {
u32_2 K0, K1, N0, N1;
// load key and nonce
K0 = to_big((*(u64*)k));
K1 = to_big((*(u64*)(k + 8)));
N0 = to_big((*(u64*)npub));
N1 = to_big((*(u64*)(npub + 8)));
// initialization
to_big_immediate(s->x0, IV);
s->x1.h = K0.h;
s->x1.l = K0.l;
s->x2.h = K1.h;
s->x2.l = K1.l;
s->x3.h = N0.h;
s->x3.l = N0.l;
s->x4.h = N1.h;
s->x4.l = N1.l;
P(s, START_ROUND(PA_ROUNDS));
s->x3.h ^= K0.h;
s->x3.l ^= K0.l;
s->x4.h ^= K1.h;
s->x4.l ^= K1.l;
// process associated data
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_AD);
P(s, START_ROUND(PB_ROUNDS));
}
s->x4.l ^= 1;
// process plaintext/ciphertext
process_data(s, out, in, tlen, mode);
// finalization
s->x1.h ^= K0.h;
s->x1.l ^= K0.l;
s->x2.h ^= K1.h;
s->x2.l ^= K1.l;
P(s, START_ROUND(PA_ROUNDS));
s->x3.h ^= K0.h;
s->x3.l ^= K0.l;
s->x4.h ^= K1.h;
s->x4.l ^= K1.l;
}
#ifndef CORE_H_
#define CORE_H_
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define ASCON_AD 0
#define ASCON_ENC 1
#define ASCON_DEC 2
#define RATE (64 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 0 | (u64)(8 * (RATE)) << 8 | \
(u64)(PA_ROUNDS) << 16 | (u64)(PB_ROUNDS) << 24)
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode);
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode);
#endif // CORE_H_
#include "core.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
state s;
u32_2 t0, t1;
(void)nsec;
// set plaintext size
*mlen = clen - CRYPTO_ABYTES;
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC);
// verify tag (should be constant time, check compiler output)
t0 = to_big((*(u64*)(c + *mlen)));
t1 = to_big((*(u64*)(c + *mlen + 8)));
if (((s.x3.h ^ t0.h) | (s.x3.l ^ t0.l) | (s.x4.h ^ t1.h) | (s.x4.l ^ t1.l)) !=
0) {
*mlen = 0;
return -1;
}
return 0;
}
#include "core.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
state s;
u64 tmp0, tmp1;
(void)nsec;
// set ciphertext size
*clen = mlen + CRYPTO_ABYTES;
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC);
// set tag
tmp0 = from_big(s.x3);
*(u64*)(c + mlen) = (tmp0);
tmp1 = from_big(s.x4);
*(u64*)(c + mlen + 8) = (tmp1);
return 0;
}
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
Christoph Dobraunig
Martin Schläffer
Ferdinand Bachmann
#include "endian.h"
#include "permutations.h"
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_big(u64 in) {
u32_2 out;
to_big_immediate(out, in);
return out;
}
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_big(u32_2 in) {
u64 out;
from_big_immediate(out, in);
return out;
}
/*
void P(state *p, u8 start_round) {
// implemented in asm
}
*/
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 h;
u32 l;
} u32_2;
typedef struct {
u32_2 x0;
u32_2 x1;
u32_2 x2;
u32_2 x3;
u32_2 x4;
} state;
#define START_ROUND(x) (12 - (x))
u32_2 to_big(u64 in);
u64 from_big(u32_2 in);
#define to_big_immediate(out, in) \
do { \
u64 big_in = U64BIG(in); \
u32 hi = (big_in) >> 32; \
u32 lo = (u32)(big_in); \
out.h = hi; \
out.l = lo; \
} while (0)
#define from_big_immediate(out, in) \
do { \
u32 hi = in.h; \
u32 lo = in.l; \
out = (u64)hi << 32 | lo; \
out = U64BIG(out); \
} while (0)
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#include <xtensa/coreasm.h>
#define tmp0 a2
#define rnd a3
#define x0h a4
#define x0l a5
#define x1h a6
#define x1l a7
#define x2h a8
#define x2l a9
#define x3h a10
#define x3l a11
#define x4h a12
#define x4l a13
#define t0h a14
#define t0l a15
#define tmp1 a14
#define tmp2 a15
.section .text
.align 4
.global P
.type P,@function
P:
abi_entry 4
l32i x0h, a2, 0
l32i x0l, a2, 4
l32i x1h, a2, 8
l32i x1l, a2, 12
l32i x2h, a2, 16
l32i x2l, a2, 20
l32i x3h, a2, 24
l32i x3l, a2, 28
l32i x4h, a2, 32
l32i x4l, a2, 36
s32i a2, a1, 0
.Lround_start:
/* round constant */
movi tmp0, 15
sub tmp0, tmp0, rnd
slli tmp0, tmp0, 4
or tmp0, tmp0, rnd
xor x2l, x2l, tmp0
/* s-box layer */
movi tmp0, -1
/* high */
xor x0h, x0h, x4h
xor x4h, x4h, x3h
xor x2h, x2h, x1h
xor tmp1, tmp0, x4h
and tmp2, tmp1, x0h
xor tmp1, tmp0, x1h
and tmp1, tmp1, x2h
xor x0h, x0h, tmp1
xor tmp1, tmp0, x3h
and tmp1, tmp1, x4h
xor x2h, x2h, tmp1
xor tmp1, tmp0, x0h
and tmp1, tmp1, x1h
xor x4h, x4h, tmp1
xor tmp1, tmp0, x2h
and tmp1, tmp1, x3h
xor x1h, x1h, tmp1
xor x3h, x3h, tmp2
xor x1h, x1h, x0h
xor x3h, x3h, x2h
xor x0h, x0h, x4h
xor x2h, x2h, tmp0
/* low */
xor x0l, x0l, x4l
xor x4l, x4l, x3l
xor x2l, x2l, x1l
xor tmp1, tmp0, x4l
and tmp2, tmp1, x0l
xor tmp1, tmp0, x1l
and tmp1, tmp1, x2l
xor x0l, x0l, tmp1
xor tmp1, tmp0, x3l
and tmp1, tmp1, x4l
xor x2l, x2l, tmp1
xor tmp1, tmp0, x0l
and tmp1, tmp1, x1l
xor x4l, x4l, tmp1
xor tmp1, tmp0, x2l
and tmp1, tmp1, x3l
xor x1l, x1l, tmp1
xor x3l, x3l, tmp2
xor x1l, x1l, x0l
xor x3l, x3l, x2l
xor x0l, x0l, x4l
xor x2l, x2l, tmp0
/* linear layer */
ssai 19
src t0l, x0h, x0l
src t0h, x0l, x0h
xor t0l, t0l, x0l
xor t0h, t0h, x0h
ssai 28
src tmp0, x0h, x0l
src x0h, x0l, x0h
xor x0l, tmp0, t0l
xor x0h, x0h, t0h
ssai 29 /* inverted 61 */
src t0l, x1l, x1h
src t0h, x1h, x1l
xor t0l, t0l, x1l
xor t0h, t0h, x1h
ssai 7 /* inverted 39 */
src tmp0, x1l, x1h
src x1h, x1h, x1l
xor x1l, tmp0, t0l
xor x1h, x1h, t0h
ssai 1
src t0l, x2h, x2l
src t0h, x2l, x2h
xor t0l, t0l, x2l
xor t0h, t0h, x2h
ssai 6
src tmp0, x2h, x2l
src x2h, x2l, x2h
xor x2l, tmp0, t0l
xor x2h, x2h, t0h
ssai 10
src t0l, x3h, x3l
src t0h, x3l, x3h
xor t0l, t0l, x3l
xor t0h, t0h, x3h
ssai 17
src tmp0, x3h, x3l
src x3h, x3l, x3h
xor x3l, tmp0, t0l
xor x3h, x3h, t0h
ssai 7
src t0l, x4h, x4l
src t0h, x4l, x4h
xor t0l, t0l, x4l
xor t0h, t0h, x4h
ssai 9 /* inverted 41 */
src tmp0, x4l, x4h
src x4h, x4h, x4l
xor x4l, tmp0, t0l
xor x4h, x4h, t0h
/* loop */
addi rnd, rnd, 1
bltui rnd, 12, .Lround_start
.Lend:
l32i a2, a1, 0
s32i x0h, a2, 0
s32i x0l, a2, 4
s32i x1h, a2, 8
s32i x1l, a2, 12
s32i x2h, a2, 16
s32i x2l, a2, 20
s32i x3h, a2, 24
s32i x3l, a2, 28
s32i x4h, a2, 32
s32i x4l, a2, 36
abi_return
#define CRYPTO_KEYBYTES 20
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include <string.h>
#include "core.h"
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode) {
u32_2 t0;
u64 tmp0;
while (len >= RATE) {
tmp0 = (*(u64*)in);
t0 = to_bit_interleaving_big(tmp0);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
if (mode != ASCON_AD) {
tmp0 = from_bit_interleaving_big(s->x0);
*(u64*)out = (tmp0);
}
if (mode == ASCON_DEC) {
s->x0 = t0;
}
P(s, START_ROUND(PB_ROUNDS));
in += RATE;
out += RATE;
len -= RATE;
}
u8 bytes[8];
memcpy(bytes, in, len);
memset(bytes + len, 0, 8 - len);
bytes[len] ^= 0x80;
u64* tmp = (u64*)bytes;
t0 = to_bit_interleaving_big(tmp[0]);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
if (mode != ASCON_AD) {
tmp[0] = from_bit_interleaving_big(s->x0);
memcpy(out, bytes, len);
}
if (mode == ASCON_DEC) {
memcpy(bytes, in, len);
s->x0 = to_bit_interleaving_big(tmp[0]);
}
}
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode) {
u32_2 K0, K1, K2, N0, N1;
// load key and nonce
K0 = to_bit_interleaving_big((*(u64*)k));
K0.e >>= 16;
K0.o >>= 16;
K1 = to_bit_interleaving_big((*(u64*)(k + 4)));
K2 = to_bit_interleaving_big((*(u64*)(k + 12)));
N0 = to_bit_interleaving_big((*(u64*)npub));
N1 = to_bit_interleaving_big((*(u64*)(npub + 8)));
// initialization
to_bit_interleaving_big_immediate(s->x0, IV);
s->x0.e |= K0.e;
s->x0.o |= K0.o;
s->x1.e = K1.e;
s->x1.o = K1.o;
s->x2.e = K2.e;
s->x2.o = K2.o;
s->x3.e = N0.e;
s->x3.o = N0.o;
s->x4.e = N1.e;
s->x4.o = N1.o;
P(s, START_ROUND(PA_ROUNDS));
s->x2.e ^= K0.e;
s->x2.o ^= K0.o;
s->x3.e ^= K1.e;
s->x3.o ^= K1.o;
s->x4.e ^= K2.e;
s->x4.o ^= K2.o;
// process associated data
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_AD);
P(s, START_ROUND(PB_ROUNDS));
}
s->x4.e ^= 1;
// process plaintext/ciphertext
process_data(s, out, in, tlen, mode);
// finalization
s->x1.e ^= (K0.e << 16) | (K1.e >> 16);
s->x1.o ^= (K0.o << 16) | (K1.o >> 16);
s->x2.e ^= (K1.e << 16) | (K2.e >> 16);
s->x2.o ^= (K1.o << 16) | (K2.o >> 16);
s->x3.e ^= (K2.e << 16);
s->x3.o ^= (K2.o << 16);
P(s, START_ROUND(PA_ROUNDS));
s->x3.e ^= K1.e;
s->x3.o ^= K1.o;
s->x4.e ^= K2.e;
s->x4.o ^= K2.o;
}
#ifndef CORE_H_
#define CORE_H_
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define ASCON_AD 0
#define ASCON_ENC 1
#define ASCON_DEC 2
#define RATE (64 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 0 | (u64)(8 * (RATE)) << 8 | \
(u64)(PA_ROUNDS) << 16 | (u64)(PB_ROUNDS) << 24)
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode);
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode);
#endif // CORE_H_
#include "core.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
state s;
u32_2 t0, t1;
(void)nsec;
// set plaintext size
*mlen = clen - CRYPTO_ABYTES;
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC);
// verify tag (should be constant time, check compiler output)
t0 = to_bit_interleaving_big((*(u64*)(c + *mlen)));
t1 = to_bit_interleaving_big((*(u64*)(c + *mlen + 8)));
if (((s.x3.e ^ t0.e) | (s.x3.o ^ t0.o) | (s.x4.e ^ t1.e) | (s.x4.o ^ t1.o)) !=
0) {
*mlen = 0;
return -1;
}
return 0;
}
#include "core.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
state s;
u64 tmp0, tmp1;
(void)nsec;
// set ciphertext size
*clen = mlen + CRYPTO_ABYTES;
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC);
// set tag
tmp0 = from_bit_interleaving_big(s.x3);
*(u64*)(c + mlen) = (tmp0);
tmp1 = from_bit_interleaving_big(s.x4);
*(u64*)(c + mlen + 8) = (tmp1);
return 0;
}
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
Christoph Dobraunig
Martin Schläffer
Ferdinand Bachmann
#include "endian.h"
#include "permutations.h"
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving_big(u64 in) {
u32_2 out;
to_bit_interleaving_big_immediate(out, in);
return out;
}
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving_big(u32_2 in) {
u64 out;
from_bit_interleaving_big_immediate(out, in);
return out;
}
/*
void P(state *p, u8 start_round) {
// implemented in asm
}
*/
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 e;
u32 o;
} u32_2;
typedef struct {
u32_2 x0;
u32_2 x1;
u32_2 x2;
u32_2 x3;
u32_2 x4;
} state;
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define START_ROUND(x) (12 - (x))
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving_big(u64 in);
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving_big(u32_2 in);
#define to_bit_interleaving_big_immediate(out, in) \
do { \
u64 big_in = U64BIG(in); \
u32 hi = (big_in) >> 32; \
u32 lo = (u32)(big_in); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
out.e = (lo & 0x0000FFFF) | (hi << 16); \
out.o = (lo >> 16) | (hi & 0xFFFF0000); \
} while (0)
#define from_bit_interleaving_big_immediate(out, in) \
do { \
u32 lo = (in.e & 0x0000FFFF) | (in.o << 16); \
u32 hi = (in.e >> 16) | (in.o & 0xFFFF0000); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
out = (u64)hi << 32 | lo; \
out = U64BIG(out); \
} while (0)
/* clang-format off */
#define ROUND(C_e, C_o) \
do { \
/* round constant */ \
s.x2.e ^= C_e; s.x2.o ^= C_o; \
/* s-bos.x layer */ \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
s.x4.e ^= s.x3.e; s.x4.o ^= s.x3.o; \
s.x2.e ^= s.x1.e; s.x2.o ^= s.x1.o; \
t0.e = s.x0.e & (~s.x4.e); t0.o = s.x0.o & (~s.x4.o); \
s.x0.e ^= s.x2.e & (~s.x1.e); s.x0.o ^= s.x2.o & (~s.x1.o); \
s.x2.e ^= s.x4.e & (~s.x3.e); s.x2.o ^= s.x4.o & (~s.x3.o); \
s.x4.e ^= s.x1.e & (~s.x0.e); s.x4.o ^= s.x1.o & (~s.x0.o); \
s.x1.e ^= s.x3.e & (~s.x2.e); s.x1.o ^= s.x3.o & (~s.x2.o); \
s.x3.e ^= t0.e; s.x3.o ^= t0.o; \
s.x1.e ^= s.x0.e; s.x1.o ^= s.x0.o; \
s.x3.e ^= s.x2.e; s.x3.o ^= s.x2.o; \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
/* linear layer */ \
t0.e = s.x0.e ^ ROTR32(s.x0.o, 4); \
t0.o = s.x0.o ^ ROTR32(s.x0.e, 5); \
s.x0.e ^= ROTR32(t0.o, 9); \
s.x0.o ^= ROTR32(t0.e, 10); \
t0.e = s.x1.e ^ ROTR32(s.x1.e, 11); \
t0.o = s.x1.o ^ ROTR32(s.x1.o, 11); \
s.x1.e ^= ROTR32(t0.o, 19); \
s.x1.o ^= ROTR32(t0.e, 20); \
t0.e = s.x2.e ^ ROTR32(s.x2.o, 2); \
t0.o = s.x2.o ^ ROTR32(s.x2.e, 3); \
s.x2.e ^= t0.o; \
s.x2.o ^= ROTR32(t0.e, 1); \
t0.e = s.x3.e ^ ROTR32(s.x3.o, 3); \
t0.o = s.x3.o ^ ROTR32(s.x3.e, 4); \
s.x3.e ^= ROTR32(t0.e, 5); \
s.x3.o ^= ROTR32(t0.o, 5); \
t0.e = s.x4.e ^ ROTR32(s.x4.e, 17); \
t0.o = s.x4.o ^ ROTR32(s.x4.o, 17); \
s.x4.e ^= ROTR32(t0.o, 3); \
s.x4.o ^= ROTR32(t0.e, 4); \
s.x2.e = ~s.x2.e; s.x2.o = ~s.x2.o; \
} while(0)
/* clang-format on */
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#include <xtensa/coreasm.h>
.section .data
.align 2
.global ascon_round_constants
.type ascon_round_constants,@object
ascon_round_constants:
.byte 0xc, 0xc
.byte 0x9, 0xc
.byte 0xc, 0x9
.byte 0x9, 0x9
.byte 0x6, 0xc
.byte 0x3, 0xc
.byte 0x6, 0x9
.byte 0x3, 0x9
.byte 0xc, 0x6
.byte 0x9, 0x6
.byte 0xc, 0x3
.byte 0x9, 0x3
#define x0e a4
#define x0o a5
#define x1e a6
#define x1o a7
#define x2e a8
#define x2o a9
#define x3e a10
#define x3o a11
#define x4e a12
#define x4o a13
#define t0e a14
#define t0o a15
#define tmp a14
#define neg a15
.section .text
.align 4
.global P
.type P,@function
P:
abi_entry 4
l32i x0e, a2, 0
l32i x0o, a2, 4
l32i x1e, a2, 8
l32i x1o, a2, 12
l32i x2e, a2, 16
l32i x2o, a2, 20
l32i x3e, a2, 24
l32i x3o, a2, 28
l32i x4e, a2, 32
l32i x4o, a2, 36
s32i a2, a1, 0
movi a2, ascon_round_constants
addx2 a2, a3, a2
.Lround_start:
/* round constant */
l8ui t0e, a2, 0
xor x2e, x2e, a14
l8ui t0o, a2, 1
xor x2o, x2o, a15
/* s-box layer */
/* even */
xor x0e, x0e, x4e
xor x4e, x4e, x3e
xor x2e, x2e, x1e
movi neg, -1
xor neg, neg, x4e
and tmp, neg, x0e
movi neg, -1
xor neg, neg, x1e
and neg, neg, x2e
xor x0e, x0e, neg
movi neg, -1
xor neg, neg, x3e
and neg, neg, x4e
xor x2e, x2e, neg
movi neg, -1
xor neg, neg, x0e
and neg, neg, x1e
xor x4e, x4e, neg
movi neg, -1
xor neg, neg, x2e
and neg, neg, x3e
xor x1e, x1e, neg
xor x3e, x3e, tmp
xor x1e, x1e, x0e
xor x3e, x3e, x2e
xor x0e, x0e, x4e
/* odd */
xor x0o, x0o, x4o
xor x4o, x4o, x3o
xor x2o, x2o, x1o
movi neg, -1
xor neg, neg, x4o
and tmp, neg, x0o
movi neg, -1
xor neg, neg, x1o
and neg, neg, x2o
xor x0o, x0o, neg
movi neg, -1
xor neg, neg, x3o
and neg, neg, x4o
xor x2o, x2o, neg
movi neg, -1
xor neg, neg, x0o
and neg, neg, x1o
xor x4o, x4o, neg
movi neg, -1
xor neg, neg, x2o
and neg, neg, x3o
xor x1o, x1o, neg
xor x3o, x3o, tmp
xor x1o, x1o, x0o
xor x3o, x3o, x2o
xor x0o, x0o, x4o
/* linear layer */
ssai 4
src t0e, x0o, x0o
xor t0e, t0e, x0e
ssai 5
src t0o, x0e, x0e
xor t0o, t0o, x0o
ssai 9
src t0o, t0o, t0o
xor x0e, x0e, t0o
ssai 10
src t0e, t0e, t0e
xor x0o, x0o, t0e
ssai 11
src t0e, x1e, x1e
xor t0e, t0e, x1e
src t0o, x1o, x1o
xor t0o, t0o, x1o
ssai 19
src t0o, t0o, t0o
xor x1e, x1e, t0o
ssai 20
src t0e, t0e, t0e
xor x1o, x1o, t0e
ssai 2
src t0e, x2o, x2o
xor t0e, t0e, x2e
ssai 3
src t0o, x2e, x2e
xor t0o, t0o, x2o
xor x2e, x2e, t0o
ssai 1
src t0e, t0e, t0e
xor x2o, x2o, t0e
ssai 3
src t0e, x3o, x3o
xor t0e, t0e, x3e
ssai 4
src t0o, x3e, x3e
xor t0o, t0o, x3o
ssai 5
src t0e, t0e, t0e
xor x3e, x3e, t0e
src t0o, t0o, t0o
xor x3o, x3o, t0o
ssai 17
src t0e, x4e, x4e
xor t0e, t0e, x4e
src t0o, x4o, x4o
xor t0o, t0o, x4o
ssai 3
src t0o, t0o, t0o
xor x4e, x4e, t0o
ssai 4
src t0e, t0e, t0e
xor x4o, x4o, t0e
movi neg, -1
xor x2e, x2e, neg
xor x2o, x2o, neg
/* loop */
addi a2, a2, 2
addi a3, a3, 1
bltui a3, 12, .Lround_start
.Lend:
l32i a2, a1, 0
s32i x0e, a2, 0
s32i x0o, a2, 4
s32i x1e, a2, 8
s32i x1o, a2, 12
s32i x2e, a2, 16
s32i x2o, a2, 20
s32i x3e, a2, 24
s32i x3o, a2, 28
s32i x4e, a2, 32
s32i x4o, a2, 36
abi_return
#define CRYPTO_KEYBYTES 20
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include <string.h>
#include "core.h"
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode) {
u32_2 t0;
u64 tmp0;
while (len >= RATE) {
tmp0 = (*(u64*)in);
t0 = to_bit_interleaving_big(tmp0);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
if (mode != ASCON_AD) {
tmp0 = from_bit_interleaving_big(s->x0);
*(u64*)out = (tmp0);
}
if (mode == ASCON_DEC) {
s->x0 = t0;
}
P(s, PB_ROUNDS);
in += RATE;
out += RATE;
len -= RATE;
}
u8 bytes[8];
memcpy(bytes, in, len);
memset(bytes + len, 0, 8 - len);
bytes[len] ^= 0x80;
u64* tmp = (u64*)bytes;
t0 = to_bit_interleaving_big(tmp[0]);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
if (mode != ASCON_AD) {
tmp[0] = from_bit_interleaving_big(s->x0);
memcpy(out, bytes, len);
}
if (mode == ASCON_DEC) {
memcpy(bytes, in, len);
s->x0 = to_bit_interleaving_big(tmp[0]);
}
}
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode) {
u32_2 K0, K1, K2, N0, N1;
// load key and nonce
K0 = to_bit_interleaving_big((*(u64*)k));
K0.e >>= 16;
K0.o >>= 16;
K1 = to_bit_interleaving_big((*(u64*)(k + 4)));
K2 = to_bit_interleaving_big((*(u64*)(k + 12)));
N0 = to_bit_interleaving_big((*(u64*)npub));
N1 = to_bit_interleaving_big((*(u64*)(npub + 8)));
// initialization
to_bit_interleaving_big_immediate(s->x0, IV);
s->x0.e |= K0.e;
s->x0.o |= K0.o;
s->x1.e = K1.e;
s->x1.o = K1.o;
s->x2.e = K2.e;
s->x2.o = K2.o;
s->x3.e = N0.e;
s->x3.o = N0.o;
s->x4.e = N1.e;
s->x4.o = N1.o;
P(s, PA_ROUNDS);
s->x2.e ^= K0.e;
s->x2.o ^= K0.o;
s->x3.e ^= K1.e;
s->x3.o ^= K1.o;
s->x4.e ^= K2.e;
s->x4.o ^= K2.o;
// process associated data
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_AD);
P(s, PB_ROUNDS);
}
s->x4.e ^= 1;
// process plaintext/ciphertext
process_data(s, out, in, tlen, mode);
// finalization
s->x1.e ^= (K0.e << 16) | (K1.e >> 16);
s->x1.o ^= (K0.o << 16) | (K1.o >> 16);
s->x2.e ^= (K1.e << 16) | (K2.e >> 16);
s->x2.o ^= (K1.o << 16) | (K2.o >> 16);
s->x3.e ^= (K2.e << 16);
s->x3.o ^= (K2.o << 16);
P(s, PA_ROUNDS);
s->x3.e ^= K1.e;
s->x3.o ^= K1.o;
s->x4.e ^= K2.e;
s->x4.o ^= K2.o;
}
#ifndef CORE_H_
#define CORE_H_
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define ASCON_AD 0
#define ASCON_ENC 1
#define ASCON_DEC 2
#define RATE (64 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 0 | (u64)(8 * (RATE)) << 8 | \
(u64)(PA_ROUNDS) << 16 | (u64)(PB_ROUNDS) << 24)
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode);
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode);
#endif // CORE_H_
#include "core.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
state s;
u32_2 t0, t1;
(void)nsec;
// set plaintext size
*mlen = clen - CRYPTO_ABYTES;
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC);
// verify tag (should be constant time, check compiler output)
t0 = to_bit_interleaving_big((*(u64*)(c + *mlen)));
t1 = to_bit_interleaving_big((*(u64*)(c + *mlen + 8)));
if (((s.x3.e ^ t0.e) | (s.x3.o ^ t0.o) | (s.x4.e ^ t1.e) | (s.x4.o ^ t1.o)) !=
0) {
*mlen = 0;
return -1;
}
return 0;
}
#include "core.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
state s;
u64 tmp0, tmp1;
(void)nsec;
// set ciphertext size
*clen = mlen + CRYPTO_ABYTES;
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC);
// set tag
tmp0 = from_bit_interleaving_big(s.x3);
*(u64*)(c + mlen) = (tmp0);
tmp1 = from_bit_interleaving_big(s.x4);
*(u64*)(c + mlen + 8) = (tmp1);
return 0;
}
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
Christoph Dobraunig
Martin Schläffer
Ferdinand Bachmann
#include "endian.h"
#include "permutations.h"
static const u8 constants[][2] = {
{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, {0x6, 0xc}, {0x3, 0xc},
{0x6, 0x9}, {0x3, 0x9}, {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}};
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving_big(u64 in) {
u32_2 out;
to_bit_interleaving_big_immediate(out, in);
return out;
}
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving_big(u32_2 in) {
u64 out;
from_bit_interleaving_big_immediate(out, in);
return out;
}
void P(state *p, u8 rounds) {
state s = *p;
u32_2 t0;
u32 i, start = START_ROUND(rounds);
for (i = start; i < 12; i++) ROUND(constants[i][0], constants[i][1]);
*p = s;
}
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 e;
u32 o;
} u32_2;
typedef struct {
u32_2 x0;
u32_2 x1;
u32_2 x2;
u32_2 x3;
u32_2 x4;
} state;
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define START_ROUND(x) (12 - (x))
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving_big(u64 in);
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving_big(u32_2 in);
#define to_bit_interleaving_big_immediate(out, in) \
do { \
u64 big_in = U64BIG(in); \
u32 hi = (big_in) >> 32; \
u32 lo = (u32)(big_in); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
out.e = (lo & 0x0000FFFF) | (hi << 16); \
out.o = (lo >> 16) | (hi & 0xFFFF0000); \
} while (0)
#define from_bit_interleaving_big_immediate(out, in) \
do { \
u32 lo = (in.e & 0x0000FFFF) | (in.o << 16); \
u32 hi = (in.e >> 16) | (in.o & 0xFFFF0000); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
out = (u64)hi << 32 | lo; \
out = U64BIG(out); \
} while (0)
/* clang-format off */
#define ROUND(C_e, C_o) \
do { \
/* round constant */ \
s.x2.e ^= C_e; s.x2.o ^= C_o; \
/* s-bos.x layer */ \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
s.x4.e ^= s.x3.e; s.x4.o ^= s.x3.o; \
s.x2.e ^= s.x1.e; s.x2.o ^= s.x1.o; \
t0.e = s.x0.e & (~s.x4.e); t0.o = s.x0.o & (~s.x4.o); \
s.x0.e ^= s.x2.e & (~s.x1.e); s.x0.o ^= s.x2.o & (~s.x1.o); \
s.x2.e ^= s.x4.e & (~s.x3.e); s.x2.o ^= s.x4.o & (~s.x3.o); \
s.x4.e ^= s.x1.e & (~s.x0.e); s.x4.o ^= s.x1.o & (~s.x0.o); \
s.x1.e ^= s.x3.e & (~s.x2.e); s.x1.o ^= s.x3.o & (~s.x2.o); \
s.x3.e ^= t0.e; s.x3.o ^= t0.o; \
s.x1.e ^= s.x0.e; s.x1.o ^= s.x0.o; \
s.x3.e ^= s.x2.e; s.x3.o ^= s.x2.o; \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
/* linear layer */ \
t0.e = s.x0.e ^ ROTR32(s.x0.o, 4); \
t0.o = s.x0.o ^ ROTR32(s.x0.e, 5); \
s.x0.e ^= ROTR32(t0.o, 9); \
s.x0.o ^= ROTR32(t0.e, 10); \
t0.e = s.x1.e ^ ROTR32(s.x1.e, 11); \
t0.o = s.x1.o ^ ROTR32(s.x1.o, 11); \
s.x1.e ^= ROTR32(t0.o, 19); \
s.x1.o ^= ROTR32(t0.e, 20); \
t0.e = s.x2.e ^ ROTR32(s.x2.o, 2); \
t0.o = s.x2.o ^ ROTR32(s.x2.e, 3); \
s.x2.e ^= t0.o; \
s.x2.o ^= ROTR32(t0.e, 1); \
t0.e = s.x3.e ^ ROTR32(s.x3.o, 3); \
t0.o = s.x3.o ^ ROTR32(s.x3.e, 4); \
s.x3.e ^= ROTR32(t0.e, 5); \
s.x3.o ^= ROTR32(t0.o, 5); \
t0.e = s.x4.e ^ ROTR32(s.x4.e, 17); \
t0.o = s.x4.o ^ ROTR32(s.x4.o, 17); \
s.x4.e ^= ROTR32(t0.o, 3); \
s.x4.o ^= ROTR32(t0.e, 4); \
s.x2.e = ~s.x2.e; s.x2.o = ~s.x2.o; \
} while(0)
/* clang-format on */
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#define CRYPTO_KEYBYTES 20
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include <string.h>
#include "core.h"
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode) {
u32_2 t0;
u64 tmp0;
while (len >= RATE) {
tmp0 = (*(u64*)in);
t0 = to_big(tmp0);
s->x0.h ^= t0.h;
s->x0.l ^= t0.l;
if (mode != ASCON_AD) {
tmp0 = from_big(s->x0);
*(u64*)out = (tmp0);
}
if (mode == ASCON_DEC) {
s->x0 = t0;
}
P(s, START_ROUND(PB_ROUNDS));
in += RATE;
out += RATE;
len -= RATE;
}
u8 bytes[8];
memcpy(bytes, in, len);
memset(bytes + len, 0, 8 - len);
bytes[len] ^= 0x80;
u64* tmp = (u64*)bytes;
t0 = to_big(tmp[0]);
s->x0.h ^= t0.h;
s->x0.l ^= t0.l;
if (mode != ASCON_AD) {
tmp[0] = from_big(s->x0);
memcpy(out, bytes, len);
}
if (mode == ASCON_DEC) {
memcpy(bytes, in, len);
s->x0 = to_big(tmp[0]);
}
}
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode) {
u32_2 K0, K1, K2, N0, N1;
// load key and nonce
K0 = to_big((*(u64*)k));
K1 = to_big((*(u64*)(k + 4)));
K2 = to_big((*(u64*)(k + 12)));
N0 = to_big((*(u64*)npub));
N1 = to_big((*(u64*)(npub + 8)));
// initialization
to_big_immediate(s->x0, IV);
s->x0.l = K0.h;
s->x1.h = K1.h;
s->x1.l = K1.l;
s->x2.h = K2.h;
s->x2.l = K2.l;
s->x3.h = N0.h;
s->x3.l = N0.l;
s->x4.h = N1.h;
s->x4.l = N1.l;
P(s, START_ROUND(PA_ROUNDS));
s->x2.l ^= K0.h;
s->x3.h ^= K1.h;
s->x3.l ^= K1.l;
s->x4.h ^= K2.h;
s->x4.l ^= K2.l;
// process associated data
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_AD);
P(s, START_ROUND(PB_ROUNDS));
}
s->x4.l ^= 1;
// process plaintext/ciphertext
process_data(s, out, in, tlen, mode);
// finalization
s->x1.h ^= K0.h;
s->x1.l ^= K1.h;
s->x2.h ^= K1.l;
s->x2.l ^= K2.h;
s->x3.h ^= K2.l;
P(s, START_ROUND(PA_ROUNDS));
s->x3.h ^= K1.h;
s->x3.l ^= K1.l;
s->x4.h ^= K2.h;
s->x4.l ^= K2.l;
}
#ifndef CORE_H_
#define CORE_H_
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define ASCON_AD 0
#define ASCON_ENC 1
#define ASCON_DEC 2
#define RATE (64 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 0 | (u64)(8 * (RATE)) << 8 | \
(u64)(PA_ROUNDS) << 16 | (u64)(PB_ROUNDS) << 24)
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode);
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode);
#endif // CORE_H_
#include "core.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
state s;
u32_2 t0, t1;
(void)nsec;
// set plaintext size
*mlen = clen - CRYPTO_ABYTES;
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC);
// verify tag (should be constant time, check compiler output)
t0 = to_big((*(u64*)(c + *mlen)));
t1 = to_big((*(u64*)(c + *mlen + 8)));
if (((s.x3.h ^ t0.h) | (s.x3.l ^ t0.l) | (s.x4.h ^ t1.h) | (s.x4.l ^ t1.l)) !=
0) {
*mlen = 0;
return -1;
}
return 0;
}
#include "core.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
state s;
u64 tmp0, tmp1;
(void)nsec;
// set ciphertext size
*clen = mlen + CRYPTO_ABYTES;
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC);
// set tag
tmp0 = from_big(s.x3);
*(u64*)(c + mlen) = (tmp0);
tmp1 = from_big(s.x4);
*(u64*)(c + mlen + 8) = (tmp1);
return 0;
}
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
Christoph Dobraunig
Martin Schläffer
Ferdinand Bachmann
#include "endian.h"
#include "permutations.h"
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_big(u64 in) {
u32_2 out;
to_big_immediate(out, in);
return out;
}
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_big(u32_2 in) {
u64 out;
from_big_immediate(out, in);
return out;
}
/*
void P(state *p, u8 start_round) {
// implemented in asm
}
*/
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 h;
u32 l;
} u32_2;
typedef struct {
u32_2 x0;
u32_2 x1;
u32_2 x2;
u32_2 x3;
u32_2 x4;
} state;
#define START_ROUND(x) (12 - (x))
u32_2 to_big(u64 in);
u64 from_big(u32_2 in);
#define to_big_immediate(out, in) \
do { \
u64 big_in = U64BIG(in); \
u32 hi = (big_in) >> 32; \
u32 lo = (u32)(big_in); \
out.h = hi; \
out.l = lo; \
} while (0)
#define from_big_immediate(out, in) \
do { \
u32 hi = in.h; \
u32 lo = in.l; \
out = (u64)hi << 32 | lo; \
out = U64BIG(out); \
} while (0)
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#include <xtensa/coreasm.h>
#define tmp0 a2
#define rnd a3
#define x0h a4
#define x0l a5
#define x1h a6
#define x1l a7
#define x2h a8
#define x2l a9
#define x3h a10
#define x3l a11
#define x4h a12
#define x4l a13
#define t0h a14
#define t0l a15
#define tmp1 a14
#define tmp2 a15
.section .text
.align 4
.global P
.type P,@function
P:
abi_entry 4
l32i x0h, a2, 0
l32i x0l, a2, 4
l32i x1h, a2, 8
l32i x1l, a2, 12
l32i x2h, a2, 16
l32i x2l, a2, 20
l32i x3h, a2, 24
l32i x3l, a2, 28
l32i x4h, a2, 32
l32i x4l, a2, 36
s32i a2, a1, 0
.Lround_start:
/* round constant */
movi tmp0, 15
sub tmp0, tmp0, rnd
slli tmp0, tmp0, 4
or tmp0, tmp0, rnd
xor x2l, x2l, tmp0
/* s-box layer */
movi tmp0, -1
/* high */
xor x0h, x0h, x4h
xor x4h, x4h, x3h
xor x2h, x2h, x1h
xor tmp1, tmp0, x4h
and tmp2, tmp1, x0h
xor tmp1, tmp0, x1h
and tmp1, tmp1, x2h
xor x0h, x0h, tmp1
xor tmp1, tmp0, x3h
and tmp1, tmp1, x4h
xor x2h, x2h, tmp1
xor tmp1, tmp0, x0h
and tmp1, tmp1, x1h
xor x4h, x4h, tmp1
xor tmp1, tmp0, x2h
and tmp1, tmp1, x3h
xor x1h, x1h, tmp1
xor x3h, x3h, tmp2
xor x1h, x1h, x0h
xor x3h, x3h, x2h
xor x0h, x0h, x4h
xor x2h, x2h, tmp0
/* low */
xor x0l, x0l, x4l
xor x4l, x4l, x3l
xor x2l, x2l, x1l
xor tmp1, tmp0, x4l
and tmp2, tmp1, x0l
xor tmp1, tmp0, x1l
and tmp1, tmp1, x2l
xor x0l, x0l, tmp1
xor tmp1, tmp0, x3l
and tmp1, tmp1, x4l
xor x2l, x2l, tmp1
xor tmp1, tmp0, x0l
and tmp1, tmp1, x1l
xor x4l, x4l, tmp1
xor tmp1, tmp0, x2l
and tmp1, tmp1, x3l
xor x1l, x1l, tmp1
xor x3l, x3l, tmp2
xor x1l, x1l, x0l
xor x3l, x3l, x2l
xor x0l, x0l, x4l
xor x2l, x2l, tmp0
/* linear layer */
ssai 19
src t0l, x0h, x0l
src t0h, x0l, x0h
xor t0l, t0l, x0l
xor t0h, t0h, x0h
ssai 28
src tmp0, x0h, x0l
src x0h, x0l, x0h
xor x0l, tmp0, t0l
xor x0h, x0h, t0h
ssai 29 /* inverted 61 */
src t0l, x1l, x1h
src t0h, x1h, x1l
xor t0l, t0l, x1l
xor t0h, t0h, x1h
ssai 7 /* inverted 39 */
src tmp0, x1l, x1h
src x1h, x1h, x1l
xor x1l, tmp0, t0l
xor x1h, x1h, t0h
ssai 1
src t0l, x2h, x2l
src t0h, x2l, x2h
xor t0l, t0l, x2l
xor t0h, t0h, x2h
ssai 6
src tmp0, x2h, x2l
src x2h, x2l, x2h
xor x2l, tmp0, t0l
xor x2h, x2h, t0h
ssai 10
src t0l, x3h, x3l
src t0h, x3l, x3h
xor t0l, t0l, x3l
xor t0h, t0h, x3h
ssai 17
src tmp0, x3h, x3l
src x3h, x3l, x3h
xor x3l, tmp0, t0l
xor x3h, x3h, t0h
ssai 7
src t0l, x4h, x4l
src t0h, x4l, x4h
xor t0l, t0l, x4l
xor t0h, t0h, x4h
ssai 9 /* inverted 41 */
src tmp0, x4l, x4h
src x4h, x4h, x4l
xor x4l, tmp0, t0l
xor x4h, x4h, t0h
/* loop */
addi rnd, rnd, 1
bltui rnd, 12, .Lround_start
.Lend:
l32i a2, a1, 0
s32i x0h, a2, 0
s32i x0l, a2, 4
s32i x1h, a2, 8
s32i x1l, a2, 12
s32i x2h, a2, 16
s32i x2l, a2, 20
s32i x3h, a2, 24
s32i x3l, a2, 28
s32i x4h, a2, 32
s32i x4l, a2, 36
abi_return
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
#include <string.h>
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define RATE (64 / 8)
#define PA_ROUNDS 12
int crypto_hash(unsigned char* out, const unsigned char* in,
unsigned long long inlen) {
state s;
u64 outlen;
// initialization
to_bit_interleaving_big_immediate(s.x0, 0x3df067dbaa9893ee);
to_bit_interleaving_big_immediate(s.x1, 0x02100fc63118b28b);
to_bit_interleaving_big_immediate(s.x2, 0x62dad598db928ab4);
to_bit_interleaving_big_immediate(s.x3, 0xe8e3f8b821991843);
to_bit_interleaving_big_immediate(s.x4, 0x40e125d5c9a58f34);
u32_2 t0;
u64 tmp0;
// absorb plaintext
while (inlen >= RATE) {
tmp0 = (*(u64*)in);
t0 = to_bit_interleaving_big(tmp0);
s.x0.e ^= t0.e;
s.x0.o ^= t0.o;
P(&s, START_ROUND(PA_ROUNDS));
inlen -= RATE;
in += RATE;
}
u8 bytes[16];
memcpy(bytes, in, inlen);
memset(bytes + inlen, 0, 16 - inlen);
bytes[inlen] ^= 0x80;
u64* tmp = (u64*)bytes;
t0 = to_bit_interleaving_big(tmp[0]);
s.x0.e ^= t0.e;
s.x0.o ^= t0.o;
P(&s, START_ROUND(PA_ROUNDS));
// squeeze hash
outlen = CRYPTO_BYTES;
while (outlen >= RATE) {
tmp0 = from_bit_interleaving_big(s.x0);
*(u64*)out = tmp0;
P(&s, START_ROUND(PA_ROUNDS));
outlen -= RATE;
out += RATE;
}
return 0;
}
Christoph Dobraunig
Martin Schläffer
Ferdinand Bachmann
#include "endian.h"
#include "permutations.h"
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving_big(u64 in) {
u32_2 out;
to_bit_interleaving_big_immediate(out, in);
return out;
}
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving_big(u32_2 in) {
u64 out;
from_bit_interleaving_big_immediate(out, in);
return out;
}
/*
void P(state *p, u8 start_round) {
// implemented in asm
}
*/
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 e;
u32 o;
} u32_2;
typedef struct {
u32_2 x0;
u32_2 x1;
u32_2 x2;
u32_2 x3;
u32_2 x4;
} state;
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define START_ROUND(x) (12 - (x))
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving_big(u64 in);
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving_big(u32_2 in);
#define to_bit_interleaving_big_immediate(out, in) \
do { \
u64 big_in = U64BIG(in); \
u32 hi = (big_in) >> 32; \
u32 lo = (u32)(big_in); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
out.e = (lo & 0x0000FFFF) | (hi << 16); \
out.o = (lo >> 16) | (hi & 0xFFFF0000); \
} while (0)
#define from_bit_interleaving_big_immediate(out, in) \
do { \
u32 lo = (in.e & 0x0000FFFF) | (in.o << 16); \
u32 hi = (in.e >> 16) | (in.o & 0xFFFF0000); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
out = (u64)hi << 32 | lo; \
out = U64BIG(out); \
} while (0)
/* clang-format off */
#define ROUND(C_e, C_o) \
do { \
/* round constant */ \
s.x2.e ^= C_e; s.x2.o ^= C_o; \
/* s-bos.x layer */ \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
s.x4.e ^= s.x3.e; s.x4.o ^= s.x3.o; \
s.x2.e ^= s.x1.e; s.x2.o ^= s.x1.o; \
t0.e = s.x0.e & (~s.x4.e); t0.o = s.x0.o & (~s.x4.o); \
s.x0.e ^= s.x2.e & (~s.x1.e); s.x0.o ^= s.x2.o & (~s.x1.o); \
s.x2.e ^= s.x4.e & (~s.x3.e); s.x2.o ^= s.x4.o & (~s.x3.o); \
s.x4.e ^= s.x1.e & (~s.x0.e); s.x4.o ^= s.x1.o & (~s.x0.o); \
s.x1.e ^= s.x3.e & (~s.x2.e); s.x1.o ^= s.x3.o & (~s.x2.o); \
s.x3.e ^= t0.e; s.x3.o ^= t0.o; \
s.x1.e ^= s.x0.e; s.x1.o ^= s.x0.o; \
s.x3.e ^= s.x2.e; s.x3.o ^= s.x2.o; \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
/* linear layer */ \
t0.e = s.x0.e ^ ROTR32(s.x0.o, 4); \
t0.o = s.x0.o ^ ROTR32(s.x0.e, 5); \
s.x0.e ^= ROTR32(t0.o, 9); \
s.x0.o ^= ROTR32(t0.e, 10); \
t0.e = s.x1.e ^ ROTR32(s.x1.e, 11); \
t0.o = s.x1.o ^ ROTR32(s.x1.o, 11); \
s.x1.e ^= ROTR32(t0.o, 19); \
s.x1.o ^= ROTR32(t0.e, 20); \
t0.e = s.x2.e ^ ROTR32(s.x2.o, 2); \
t0.o = s.x2.o ^ ROTR32(s.x2.e, 3); \
s.x2.e ^= t0.o; \
s.x2.o ^= ROTR32(t0.e, 1); \
t0.e = s.x3.e ^ ROTR32(s.x3.o, 3); \
t0.o = s.x3.o ^ ROTR32(s.x3.e, 4); \
s.x3.e ^= ROTR32(t0.e, 5); \
s.x3.o ^= ROTR32(t0.o, 5); \
t0.e = s.x4.e ^ ROTR32(s.x4.e, 17); \
t0.o = s.x4.o ^ ROTR32(s.x4.o, 17); \
s.x4.e ^= ROTR32(t0.o, 3); \
s.x4.o ^= ROTR32(t0.e, 4); \
s.x2.e = ~s.x2.e; s.x2.o = ~s.x2.o; \
} while(0)
/* clang-format on */
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#include <xtensa/coreasm.h>
.section .data
.align 2
.global ascon_round_constants
.type ascon_round_constants,@object
ascon_round_constants:
.byte 0xc, 0xc
.byte 0x9, 0xc
.byte 0xc, 0x9
.byte 0x9, 0x9
.byte 0x6, 0xc
.byte 0x3, 0xc
.byte 0x6, 0x9
.byte 0x3, 0x9
.byte 0xc, 0x6
.byte 0x9, 0x6
.byte 0xc, 0x3
.byte 0x9, 0x3
#define x0e a4
#define x0o a5
#define x1e a6
#define x1o a7
#define x2e a8
#define x2o a9
#define x3e a10
#define x3o a11
#define x4e a12
#define x4o a13
#define t0e a14
#define t0o a15
#define tmp a14
#define neg a15
.section .text
.align 4
.global P
.type P,@function
P:
abi_entry 4
l32i x0e, a2, 0
l32i x0o, a2, 4
l32i x1e, a2, 8
l32i x1o, a2, 12
l32i x2e, a2, 16
l32i x2o, a2, 20
l32i x3e, a2, 24
l32i x3o, a2, 28
l32i x4e, a2, 32
l32i x4o, a2, 36
s32i a2, a1, 0
movi a2, ascon_round_constants
addx2 a2, a3, a2
.Lround_start:
/* round constant */
l8ui t0e, a2, 0
xor x2e, x2e, a14
l8ui t0o, a2, 1
xor x2o, x2o, a15
/* s-box layer */
/* even */
xor x0e, x0e, x4e
xor x4e, x4e, x3e
xor x2e, x2e, x1e
movi neg, -1
xor neg, neg, x4e
and tmp, neg, x0e
movi neg, -1
xor neg, neg, x1e
and neg, neg, x2e
xor x0e, x0e, neg
movi neg, -1
xor neg, neg, x3e
and neg, neg, x4e
xor x2e, x2e, neg
movi neg, -1
xor neg, neg, x0e
and neg, neg, x1e
xor x4e, x4e, neg
movi neg, -1
xor neg, neg, x2e
and neg, neg, x3e
xor x1e, x1e, neg
xor x3e, x3e, tmp
xor x1e, x1e, x0e
xor x3e, x3e, x2e
xor x0e, x0e, x4e
/* odd */
xor x0o, x0o, x4o
xor x4o, x4o, x3o
xor x2o, x2o, x1o
movi neg, -1
xor neg, neg, x4o
and tmp, neg, x0o
movi neg, -1
xor neg, neg, x1o
and neg, neg, x2o
xor x0o, x0o, neg
movi neg, -1
xor neg, neg, x3o
and neg, neg, x4o
xor x2o, x2o, neg
movi neg, -1
xor neg, neg, x0o
and neg, neg, x1o
xor x4o, x4o, neg
movi neg, -1
xor neg, neg, x2o
and neg, neg, x3o
xor x1o, x1o, neg
xor x3o, x3o, tmp
xor x1o, x1o, x0o
xor x3o, x3o, x2o
xor x0o, x0o, x4o
/* linear layer */
ssai 4
src t0e, x0o, x0o
xor t0e, t0e, x0e
ssai 5
src t0o, x0e, x0e
xor t0o, t0o, x0o
ssai 9
src t0o, t0o, t0o
xor x0e, x0e, t0o
ssai 10
src t0e, t0e, t0e
xor x0o, x0o, t0e
ssai 11
src t0e, x1e, x1e
xor t0e, t0e, x1e
src t0o, x1o, x1o
xor t0o, t0o, x1o
ssai 19
src t0o, t0o, t0o
xor x1e, x1e, t0o
ssai 20
src t0e, t0e, t0e
xor x1o, x1o, t0e
ssai 2
src t0e, x2o, x2o
xor t0e, t0e, x2e
ssai 3
src t0o, x2e, x2e
xor t0o, t0o, x2o
xor x2e, x2e, t0o
ssai 1
src t0e, t0e, t0e
xor x2o, x2o, t0e
ssai 3
src t0e, x3o, x3o
xor t0e, t0e, x3e
ssai 4
src t0o, x3e, x3e
xor t0o, t0o, x3o
ssai 5
src t0e, t0e, t0e
xor x3e, x3e, t0e
src t0o, t0o, t0o
xor x3o, x3o, t0o
ssai 17
src t0e, x4e, x4e
xor t0e, t0e, x4e
src t0o, x4o, x4o
xor t0o, t0o, x4o
ssai 3
src t0o, t0o, t0o
xor x4e, x4e, t0o
ssai 4
src t0e, t0e, t0e
xor x4o, x4o, t0e
movi neg, -1
xor x2e, x2e, neg
xor x2o, x2o, neg
/* loop */
addi a2, a2, 2
addi a3, a3, 1
bltui a3, 12, .Lround_start
.Lend:
l32i a2, a1, 0
s32i x0e, a2, 0
s32i x0o, a2, 4
s32i x1e, a2, 8
s32i x1o, a2, 12
s32i x2e, a2, 16
s32i x2o, a2, 20
s32i x3e, a2, 24
s32i x3o, a2, 28
s32i x4e, a2, 32
s32i x4o, a2, 36
abi_return
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
#include <string.h>
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define RATE (64 / 8)
#define PA_ROUNDS 12
int crypto_hash(unsigned char* out, const unsigned char* in,
unsigned long long inlen) {
state s;
u64 outlen;
// initialization
to_bit_interleaving_big_immediate(s.x0, 0x3df067dbaa9893ee);
to_bit_interleaving_big_immediate(s.x1, 0x02100fc63118b28b);
to_bit_interleaving_big_immediate(s.x2, 0x62dad598db928ab4);
to_bit_interleaving_big_immediate(s.x3, 0xe8e3f8b821991843);
to_bit_interleaving_big_immediate(s.x4, 0x40e125d5c9a58f34);
u32_2 t0;
u64 tmp0;
// absorb plaintext
while (inlen >= RATE) {
tmp0 = (*(u64*)in);
t0 = to_bit_interleaving_big(tmp0);
s.x0.e ^= t0.e;
s.x0.o ^= t0.o;
P(&s, PA_ROUNDS);
inlen -= RATE;
in += RATE;
}
u8 bytes[16];
memcpy(bytes, in, inlen);
memset(bytes + inlen, 0, 16 - inlen);
bytes[inlen] ^= 0x80;
u64* tmp = (u64*)bytes;
t0 = to_bit_interleaving_big(tmp[0]);
s.x0.e ^= t0.e;
s.x0.o ^= t0.o;
P(&s, PA_ROUNDS);
// squeeze hash
outlen = CRYPTO_BYTES;
while (outlen >= RATE) {
tmp0 = from_bit_interleaving_big(s.x0);
*(u64*)out = tmp0;
P(&s, PA_ROUNDS);
outlen -= RATE;
out += RATE;
}
return 0;
}
Christoph Dobraunig
Martin Schläffer
Ferdinand Bachmann
#include "endian.h"
#include "permutations.h"
static const u8 constants[][2] = {
{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, {0x6, 0xc}, {0x3, 0xc},
{0x6, 0x9}, {0x3, 0x9}, {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}};
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving_big(u64 in) {
u32_2 out;
to_bit_interleaving_big_immediate(out, in);
return out;
}
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving_big(u32_2 in) {
u64 out;
from_bit_interleaving_big_immediate(out, in);
return out;
}
void P(state *p, u8 rounds) {
state s = *p;
u32_2 t0;
u32 i, start = START_ROUND(rounds);
for (i = start; i < 12; i++) ROUND(constants[i][0], constants[i][1]);
*p = s;
}
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 e;
u32 o;
} u32_2;
typedef struct {
u32_2 x0;
u32_2 x1;
u32_2 x2;
u32_2 x3;
u32_2 x4;
} state;
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define START_ROUND(x) (12 - (x))
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving_big(u64 in);
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving_big(u32_2 in);
#define to_bit_interleaving_big_immediate(out, in) \
do { \
u64 big_in = U64BIG(in); \
u32 hi = (big_in) >> 32; \
u32 lo = (u32)(big_in); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
out.e = (lo & 0x0000FFFF) | (hi << 16); \
out.o = (lo >> 16) | (hi & 0xFFFF0000); \
} while (0)
#define from_bit_interleaving_big_immediate(out, in) \
do { \
u32 lo = (in.e & 0x0000FFFF) | (in.o << 16); \
u32 hi = (in.e >> 16) | (in.o & 0xFFFF0000); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
out = (u64)hi << 32 | lo; \
out = U64BIG(out); \
} while (0)
/* clang-format off */
#define ROUND(C_e, C_o) \
do { \
/* round constant */ \
s.x2.e ^= C_e; s.x2.o ^= C_o; \
/* s-bos.x layer */ \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
s.x4.e ^= s.x3.e; s.x4.o ^= s.x3.o; \
s.x2.e ^= s.x1.e; s.x2.o ^= s.x1.o; \
t0.e = s.x0.e & (~s.x4.e); t0.o = s.x0.o & (~s.x4.o); \
s.x0.e ^= s.x2.e & (~s.x1.e); s.x0.o ^= s.x2.o & (~s.x1.o); \
s.x2.e ^= s.x4.e & (~s.x3.e); s.x2.o ^= s.x4.o & (~s.x3.o); \
s.x4.e ^= s.x1.e & (~s.x0.e); s.x4.o ^= s.x1.o & (~s.x0.o); \
s.x1.e ^= s.x3.e & (~s.x2.e); s.x1.o ^= s.x3.o & (~s.x2.o); \
s.x3.e ^= t0.e; s.x3.o ^= t0.o; \
s.x1.e ^= s.x0.e; s.x1.o ^= s.x0.o; \
s.x3.e ^= s.x2.e; s.x3.o ^= s.x2.o; \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
/* linear layer */ \
t0.e = s.x0.e ^ ROTR32(s.x0.o, 4); \
t0.o = s.x0.o ^ ROTR32(s.x0.e, 5); \
s.x0.e ^= ROTR32(t0.o, 9); \
s.x0.o ^= ROTR32(t0.e, 10); \
t0.e = s.x1.e ^ ROTR32(s.x1.e, 11); \
t0.o = s.x1.o ^ ROTR32(s.x1.o, 11); \
s.x1.e ^= ROTR32(t0.o, 19); \
s.x1.o ^= ROTR32(t0.e, 20); \
t0.e = s.x2.e ^ ROTR32(s.x2.o, 2); \
t0.o = s.x2.o ^ ROTR32(s.x2.e, 3); \
s.x2.e ^= t0.o; \
s.x2.o ^= ROTR32(t0.e, 1); \
t0.e = s.x3.e ^ ROTR32(s.x3.o, 3); \
t0.o = s.x3.o ^ ROTR32(s.x3.e, 4); \
s.x3.e ^= ROTR32(t0.e, 5); \
s.x3.o ^= ROTR32(t0.o, 5); \
t0.e = s.x4.e ^ ROTR32(s.x4.e, 17); \
t0.o = s.x4.o ^ ROTR32(s.x4.o, 17); \
s.x4.e ^= ROTR32(t0.o, 3); \
s.x4.o ^= ROTR32(t0.e, 4); \
s.x2.e = ~s.x2.e; s.x2.o = ~s.x2.o; \
} while(0)
/* clang-format on */
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
#include <string.h>
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define RATE (64 / 8)
#define PA_ROUNDS 12
int crypto_hash(unsigned char* out, const unsigned char* in,
unsigned long long inlen) {
state s;
u64 outlen;
// initialization
s.x0.h = 0xee9398aa; s.x0.l = 0xdb67f03d;
s.x1.h = 0x8bb21831; s.x1.l = 0xc60f1002;
s.x2.h = 0xb48a92db; s.x2.l = 0x98d5da62;
s.x3.h = 0x43189921; s.x3.l = 0xb8f8e3e8;
s.x4.h = 0x348fa5c9; s.x4.l = 0xd525e140;
u32_2 t0;
u64 tmp0;
// absorb plaintext
while (inlen >= RATE) {
tmp0 = (*(u64*)in);
t0 = to_big(tmp0);
s.x0.h ^= t0.h;
s.x0.l ^= t0.l;
P(&s, START_ROUND(PA_ROUNDS));
inlen -= RATE;
in += RATE;
}
u8 bytes[16];
memcpy(bytes, in, inlen);
memset(bytes + inlen, 0, 16 - inlen);
bytes[inlen] ^= 0x80;
u64* tmp = (u64*)bytes;
t0 = to_big(tmp[0]);
s.x0.h ^= t0.h;
s.x0.l ^= t0.l;
P(&s, START_ROUND(PA_ROUNDS));
// squeeze hash
outlen = CRYPTO_BYTES;
while (outlen >= RATE) {
tmp0 = from_big(s.x0);
*(u64*)out = tmp0;
P(&s, START_ROUND(PA_ROUNDS));
outlen -= RATE;
out += RATE;
}
return 0;
}
Christoph Dobraunig
Martin Schläffer
Ferdinand Bachmann
#include "endian.h"
#include "permutations.h"
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_big(u64 in) {
u32_2 out;
to_big_immediate(out, in);
return out;
}
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_big(u32_2 in) {
u64 out;
from_big_immediate(out, in);
return out;
}
/*
void P(state *p, u8 start_round) {
// implemented in asm
}
*/
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 h;
u32 l;
} u32_2;
typedef struct {
u32_2 x0;
u32_2 x1;
u32_2 x2;
u32_2 x3;
u32_2 x4;
} state;
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define START_ROUND(x) (12 - (x))
u32_2 to_big(u64 in);
u64 from_big(u32_2 in);
#define to_big_immediate(out, in) \
do { \
u64 big_in = U64BIG(in); \
u32 hi = (big_in) >> 32; \
u32 lo = (u32)(big_in); \
out.h = hi; \
out.l = lo; \
} while (0)
#define from_big_immediate(out, in) \
do { \
u32 hi = in.h; \
u32 lo = in.l; \
out = (u64)hi << 32 | lo; \
out = U64BIG(out); \
} while (0)
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#include <xtensa/coreasm.h>
#define tmp0 a2
#define rnd a3
#define x0h a4
#define x0l a5
#define x1h a6
#define x1l a7
#define x2h a8
#define x2l a9
#define x3h a10
#define x3l a11
#define x4h a12
#define x4l a13
#define t0h a14
#define t0l a15
#define tmp1 a14
#define tmp2 a15
.section .text
.align 4
.global P
.type P,@function
P:
abi_entry 4
l32i x0h, a2, 0
l32i x0l, a2, 4
l32i x1h, a2, 8
l32i x1l, a2, 12
l32i x2h, a2, 16
l32i x2l, a2, 20
l32i x3h, a2, 24
l32i x3l, a2, 28
l32i x4h, a2, 32
l32i x4l, a2, 36
s32i a2, a1, 0
.Lround_start:
/* round constant */
movi tmp0, 15
sub tmp0, tmp0, rnd
slli tmp0, tmp0, 4
or tmp0, tmp0, rnd
xor x2l, x2l, tmp0
/* s-box layer */
movi tmp0, -1
/* high */
xor x0h, x0h, x4h
xor x4h, x4h, x3h
xor x2h, x2h, x1h
xor tmp1, tmp0, x4h
and tmp2, tmp1, x0h
xor tmp1, tmp0, x1h
and tmp1, tmp1, x2h
xor x0h, x0h, tmp1
xor tmp1, tmp0, x3h
and tmp1, tmp1, x4h
xor x2h, x2h, tmp1
xor tmp1, tmp0, x0h
and tmp1, tmp1, x1h
xor x4h, x4h, tmp1
xor tmp1, tmp0, x2h
and tmp1, tmp1, x3h
xor x1h, x1h, tmp1
xor x3h, x3h, tmp2
xor x1h, x1h, x0h
xor x3h, x3h, x2h
xor x0h, x0h, x4h
xor x2h, x2h, tmp0
/* low */
xor x0l, x0l, x4l
xor x4l, x4l, x3l
xor x2l, x2l, x1l
xor tmp1, tmp0, x4l
and tmp2, tmp1, x0l
xor tmp1, tmp0, x1l
and tmp1, tmp1, x2l
xor x0l, x0l, tmp1
xor tmp1, tmp0, x3l
and tmp1, tmp1, x4l
xor x2l, x2l, tmp1
xor tmp1, tmp0, x0l
and tmp1, tmp1, x1l
xor x4l, x4l, tmp1
xor tmp1, tmp0, x2l
and tmp1, tmp1, x3l
xor x1l, x1l, tmp1
xor x3l, x3l, tmp2
xor x1l, x1l, x0l
xor x3l, x3l, x2l
xor x0l, x0l, x4l
xor x2l, x2l, tmp0
/* linear layer */
ssai 19
src t0l, x0h, x0l
src t0h, x0l, x0h
xor t0l, t0l, x0l
xor t0h, t0h, x0h
ssai 28
src tmp0, x0h, x0l
src x0h, x0l, x0h
xor x0l, tmp0, t0l
xor x0h, x0h, t0h
ssai 29 /* inverted 61 */
src t0l, x1l, x1h
src t0h, x1h, x1l
xor t0l, t0l, x1l
xor t0h, t0h, x1h
ssai 7 /* inverted 39 */
src tmp0, x1l, x1h
src x1h, x1h, x1l
xor x1l, tmp0, t0l
xor x1h, x1h, t0h
ssai 1
src t0l, x2h, x2l
src t0h, x2l, x2h
xor t0l, t0l, x2l
xor t0h, t0h, x2h
ssai 6
src tmp0, x2h, x2l
src x2h, x2l, x2h
xor x2l, tmp0, t0l
xor x2h, x2h, t0h
ssai 10
src t0l, x3h, x3l
src t0h, x3l, x3h
xor t0l, t0l, x3l
xor t0h, t0h, x3h
ssai 17
src tmp0, x3h, x3l
src x3h, x3l, x3h
xor x3l, tmp0, t0l
xor x3h, x3h, t0h
ssai 7
src t0l, x4h, x4l
src t0h, x4l, x4h
xor t0l, t0l, x4l
xor t0h, t0h, x4h
ssai 9 /* inverted 41 */
src tmp0, x4l, x4h
src x4h, x4h, x4l
xor x4l, tmp0, t0l
xor x4h, x4h, t0h
/* loop */
addi rnd, rnd, 1
bltui rnd, 12, .Lround_start
.Lend:
l32i a2, a1, 0
s32i x0h, a2, 0
s32i x0l, a2, 4
s32i x1h, a2, 8
s32i x1l, a2, 12
s32i x2h, a2, 16
s32i x2l, a2, 20
s32i x3h, a2, 24
s32i x3l, a2, 28
s32i x4h, a2, 32
s32i x4l, a2, 36
abi_return
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
#include <string.h>
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define RATE (64 / 8)
#define PA_ROUNDS 12
int crypto_hash(unsigned char* out, const unsigned char* in,
unsigned long long inlen) {
state s;
u64 outlen;
// initialization
to_bit_interleaving_big_immediate(s.x0, 0x16d44c813b277eb5);
to_bit_interleaving_big_immediate(s.x1, 0x2024ae622504512b);
to_bit_interleaving_big_immediate(s.x2, 0x1822df8d76a7a366);
to_bit_interleaving_big_immediate(s.x3, 0x0c6553817a0aad5a);
to_bit_interleaving_big_immediate(s.x4, 0xb6939453320e3e4f);
u32_2 t0;
u64 tmp0;
// absorb plaintext
while (inlen >= RATE) {
tmp0 = (*(u64*)in);
t0 = to_bit_interleaving_big(tmp0);
s.x0.e ^= t0.e;
s.x0.o ^= t0.o;
P(&s, START_ROUND(PA_ROUNDS));
inlen -= RATE;
in += RATE;
}
u8 bytes[16];
memcpy(bytes, in, inlen);
memset(bytes + inlen, 0, 16 - inlen);
bytes[inlen] ^= 0x80;
u64* tmp = (u64*)bytes;
t0 = to_bit_interleaving_big(tmp[0]);
s.x0.e ^= t0.e;
s.x0.o ^= t0.o;
P(&s, START_ROUND(PA_ROUNDS));
// squeeze hash
outlen = CRYPTO_BYTES;
while (outlen >= RATE) {
tmp0 = from_bit_interleaving_big(s.x0);
*(u64*)out = tmp0;
P(&s, START_ROUND(PA_ROUNDS));
outlen -= RATE;
out += RATE;
}
return 0;
}
Christoph Dobraunig
Martin Schläffer
Ferdinand Bachmann
#include "endian.h"
#include "permutations.h"
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving_big(u64 in) {
u32_2 out;
to_bit_interleaving_big_immediate(out, in);
return out;
}
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving_big(u32_2 in) {
u64 out;
from_bit_interleaving_big_immediate(out, in);
return out;
}
/*
void P(state *p, u8 start_round) {
// implemented in asm
}
*/
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 e;
u32 o;
} u32_2;
typedef struct {
u32_2 x0;
u32_2 x1;
u32_2 x2;
u32_2 x3;
u32_2 x4;
} state;
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define START_ROUND(x) (12 - (x))
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving_big(u64 in);
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving_big(u32_2 in);
#define to_bit_interleaving_big_immediate(out, in) \
do { \
u64 big_in = U64BIG(in); \
u32 hi = (big_in) >> 32; \
u32 lo = (u32)(big_in); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
out.e = (lo & 0x0000FFFF) | (hi << 16); \
out.o = (lo >> 16) | (hi & 0xFFFF0000); \
} while (0)
#define from_bit_interleaving_big_immediate(out, in) \
do { \
u32 lo = (in.e & 0x0000FFFF) | (in.o << 16); \
u32 hi = (in.e >> 16) | (in.o & 0xFFFF0000); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
out = (u64)hi << 32 | lo; \
out = U64BIG(out); \
} while (0)
/* clang-format off */
#define ROUND(C_e, C_o) \
do { \
/* round constant */ \
s.x2.e ^= C_e; s.x2.o ^= C_o; \
/* s-bos.x layer */ \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
s.x4.e ^= s.x3.e; s.x4.o ^= s.x3.o; \
s.x2.e ^= s.x1.e; s.x2.o ^= s.x1.o; \
t0.e = s.x0.e & (~s.x4.e); t0.o = s.x0.o & (~s.x4.o); \
s.x0.e ^= s.x2.e & (~s.x1.e); s.x0.o ^= s.x2.o & (~s.x1.o); \
s.x2.e ^= s.x4.e & (~s.x3.e); s.x2.o ^= s.x4.o & (~s.x3.o); \
s.x4.e ^= s.x1.e & (~s.x0.e); s.x4.o ^= s.x1.o & (~s.x0.o); \
s.x1.e ^= s.x3.e & (~s.x2.e); s.x1.o ^= s.x3.o & (~s.x2.o); \
s.x3.e ^= t0.e; s.x3.o ^= t0.o; \
s.x1.e ^= s.x0.e; s.x1.o ^= s.x0.o; \
s.x3.e ^= s.x2.e; s.x3.o ^= s.x2.o; \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
/* linear layer */ \
t0.e = s.x0.e ^ ROTR32(s.x0.o, 4); \
t0.o = s.x0.o ^ ROTR32(s.x0.e, 5); \
s.x0.e ^= ROTR32(t0.o, 9); \
s.x0.o ^= ROTR32(t0.e, 10); \
t0.e = s.x1.e ^ ROTR32(s.x1.e, 11); \
t0.o = s.x1.o ^ ROTR32(s.x1.o, 11); \
s.x1.e ^= ROTR32(t0.o, 19); \
s.x1.o ^= ROTR32(t0.e, 20); \
t0.e = s.x2.e ^ ROTR32(s.x2.o, 2); \
t0.o = s.x2.o ^ ROTR32(s.x2.e, 3); \
s.x2.e ^= t0.o; \
s.x2.o ^= ROTR32(t0.e, 1); \
t0.e = s.x3.e ^ ROTR32(s.x3.o, 3); \
t0.o = s.x3.o ^ ROTR32(s.x3.e, 4); \
s.x3.e ^= ROTR32(t0.e, 5); \
s.x3.o ^= ROTR32(t0.o, 5); \
t0.e = s.x4.e ^ ROTR32(s.x4.e, 17); \
t0.o = s.x4.o ^ ROTR32(s.x4.o, 17); \
s.x4.e ^= ROTR32(t0.o, 3); \
s.x4.o ^= ROTR32(t0.e, 4); \
s.x2.e = ~s.x2.e; s.x2.o = ~s.x2.o; \
} while(0)
/* clang-format on */
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#include <xtensa/coreasm.h>
.section .data
.align 2
.global ascon_round_constants
.type ascon_round_constants,@object
ascon_round_constants:
.byte 0xc, 0xc
.byte 0x9, 0xc
.byte 0xc, 0x9
.byte 0x9, 0x9
.byte 0x6, 0xc
.byte 0x3, 0xc
.byte 0x6, 0x9
.byte 0x3, 0x9
.byte 0xc, 0x6
.byte 0x9, 0x6
.byte 0xc, 0x3
.byte 0x9, 0x3
#define x0e a4
#define x0o a5
#define x1e a6
#define x1o a7
#define x2e a8
#define x2o a9
#define x3e a10
#define x3o a11
#define x4e a12
#define x4o a13
#define t0e a14
#define t0o a15
#define tmp a14
#define neg a15
.section .text
.align 4
.global P
.type P,@function
P:
abi_entry 4
l32i x0e, a2, 0
l32i x0o, a2, 4
l32i x1e, a2, 8
l32i x1o, a2, 12
l32i x2e, a2, 16
l32i x2o, a2, 20
l32i x3e, a2, 24
l32i x3o, a2, 28
l32i x4e, a2, 32
l32i x4o, a2, 36
s32i a2, a1, 0
movi a2, ascon_round_constants
addx2 a2, a3, a2
.Lround_start:
/* round constant */
l8ui t0e, a2, 0
xor x2e, x2e, a14
l8ui t0o, a2, 1
xor x2o, x2o, a15
/* s-box layer */
/* even */
xor x0e, x0e, x4e
xor x4e, x4e, x3e
xor x2e, x2e, x1e
movi neg, -1
xor neg, neg, x4e
and tmp, neg, x0e
movi neg, -1
xor neg, neg, x1e
and neg, neg, x2e
xor x0e, x0e, neg
movi neg, -1
xor neg, neg, x3e
and neg, neg, x4e
xor x2e, x2e, neg
movi neg, -1
xor neg, neg, x0e
and neg, neg, x1e
xor x4e, x4e, neg
movi neg, -1
xor neg, neg, x2e
and neg, neg, x3e
xor x1e, x1e, neg
xor x3e, x3e, tmp
xor x1e, x1e, x0e
xor x3e, x3e, x2e
xor x0e, x0e, x4e
/* odd */
xor x0o, x0o, x4o
xor x4o, x4o, x3o
xor x2o, x2o, x1o
movi neg, -1
xor neg, neg, x4o
and tmp, neg, x0o
movi neg, -1
xor neg, neg, x1o
and neg, neg, x2o
xor x0o, x0o, neg
movi neg, -1
xor neg, neg, x3o
and neg, neg, x4o
xor x2o, x2o, neg
movi neg, -1
xor neg, neg, x0o
and neg, neg, x1o
xor x4o, x4o, neg
movi neg, -1
xor neg, neg, x2o
and neg, neg, x3o
xor x1o, x1o, neg
xor x3o, x3o, tmp
xor x1o, x1o, x0o
xor x3o, x3o, x2o
xor x0o, x0o, x4o
/* linear layer */
ssai 4
src t0e, x0o, x0o
xor t0e, t0e, x0e
ssai 5
src t0o, x0e, x0e
xor t0o, t0o, x0o
ssai 9
src t0o, t0o, t0o
xor x0e, x0e, t0o
ssai 10
src t0e, t0e, t0e
xor x0o, x0o, t0e
ssai 11
src t0e, x1e, x1e
xor t0e, t0e, x1e
src t0o, x1o, x1o
xor t0o, t0o, x1o
ssai 19
src t0o, t0o, t0o
xor x1e, x1e, t0o
ssai 20
src t0e, t0e, t0e
xor x1o, x1o, t0e
ssai 2
src t0e, x2o, x2o
xor t0e, t0e, x2e
ssai 3
src t0o, x2e, x2e
xor t0o, t0o, x2o
xor x2e, x2e, t0o
ssai 1
src t0e, t0e, t0e
xor x2o, x2o, t0e
ssai 3
src t0e, x3o, x3o
xor t0e, t0e, x3e
ssai 4
src t0o, x3e, x3e
xor t0o, t0o, x3o
ssai 5
src t0e, t0e, t0e
xor x3e, x3e, t0e
src t0o, t0o, t0o
xor x3o, x3o, t0o
ssai 17
src t0e, x4e, x4e
xor t0e, t0e, x4e
src t0o, x4o, x4o
xor t0o, t0o, x4o
ssai 3
src t0o, t0o, t0o
xor x4e, x4e, t0o
ssai 4
src t0e, t0e, t0e
xor x4o, x4o, t0e
movi neg, -1
xor x2e, x2e, neg
xor x2o, x2o, neg
/* loop */
addi a2, a2, 2
addi a3, a3, 1
bltui a3, 12, .Lround_start
.Lend:
l32i a2, a1, 0
s32i x0e, a2, 0
s32i x0o, a2, 4
s32i x1e, a2, 8
s32i x1o, a2, 12
s32i x2e, a2, 16
s32i x2o, a2, 20
s32i x3e, a2, 24
s32i x3o, a2, 28
s32i x4e, a2, 32
s32i x4o, a2, 36
abi_return
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
#include <string.h>
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define RATE (64 / 8)
#define PA_ROUNDS 12
int crypto_hash(unsigned char* out, const unsigned char* in,
unsigned long long inlen) {
state s;
u64 outlen;
// initialization
to_bit_interleaving_big_immediate(s.x0, 0x16d44c813b277eb5);
to_bit_interleaving_big_immediate(s.x1, 0x2024ae622504512b);
to_bit_interleaving_big_immediate(s.x2, 0x1822df8d76a7a366);
to_bit_interleaving_big_immediate(s.x3, 0x0c6553817a0aad5a);
to_bit_interleaving_big_immediate(s.x4, 0xb6939453320e3e4f);
u32_2 t0;
u64 tmp0;
// absorb plaintext
while (inlen >= RATE) {
tmp0 = (*(u64*)in);
t0 = to_bit_interleaving_big(tmp0);
s.x0.e ^= t0.e;
s.x0.o ^= t0.o;
P(&s, PA_ROUNDS);
inlen -= RATE;
in += RATE;
}
u8 bytes[16];
memcpy(bytes, in, inlen);
memset(bytes + inlen, 0, 16 - inlen);
bytes[inlen] ^= 0x80;
u64* tmp = (u64*)bytes;
t0 = to_bit_interleaving_big(tmp[0]);
s.x0.e ^= t0.e;
s.x0.o ^= t0.o;
P(&s, PA_ROUNDS);
// squeeze hash
outlen = CRYPTO_BYTES;
while (outlen >= RATE) {
tmp0 = from_bit_interleaving_big(s.x0);
*(u64*)out = tmp0;
P(&s, PA_ROUNDS);
outlen -= RATE;
out += RATE;
}
return 0;
}
Christoph Dobraunig
Martin Schläffer
Ferdinand Bachmann
#include "endian.h"
#include "permutations.h"
static const u8 constants[][2] = {
{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, {0x6, 0xc}, {0x3, 0xc},
{0x6, 0x9}, {0x3, 0x9}, {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}};
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving_big(u64 in) {
u32_2 out;
to_bit_interleaving_big_immediate(out, in);
return out;
}
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving_big(u32_2 in) {
u64 out;
from_bit_interleaving_big_immediate(out, in);
return out;
}
void P(state *p, u8 rounds) {
state s = *p;
u32_2 t0;
u32 i, start = START_ROUND(rounds);
for (i = start; i < 12; i++) ROUND(constants[i][0], constants[i][1]);
*p = s;
}
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 e;
u32 o;
} u32_2;
typedef struct {
u32_2 x0;
u32_2 x1;
u32_2 x2;
u32_2 x3;
u32_2 x4;
} state;
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define START_ROUND(x) (12 - (x))
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving_big(u64 in);
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving_big(u32_2 in);
#define to_bit_interleaving_big_immediate(out, in) \
do { \
u64 big_in = U64BIG(in); \
u32 hi = (big_in) >> 32; \
u32 lo = (u32)(big_in); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
out.e = (lo & 0x0000FFFF) | (hi << 16); \
out.o = (lo >> 16) | (hi & 0xFFFF0000); \
} while (0)
#define from_bit_interleaving_big_immediate(out, in) \
do { \
u32 lo = (in.e & 0x0000FFFF) | (in.o << 16); \
u32 hi = (in.e >> 16) | (in.o & 0xFFFF0000); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
out = (u64)hi << 32 | lo; \
out = U64BIG(out); \
} while (0)
/* clang-format off */
#define ROUND(C_e, C_o) \
do { \
/* round constant */ \
s.x2.e ^= C_e; s.x2.o ^= C_o; \
/* s-bos.x layer */ \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
s.x4.e ^= s.x3.e; s.x4.o ^= s.x3.o; \
s.x2.e ^= s.x1.e; s.x2.o ^= s.x1.o; \
t0.e = s.x0.e & (~s.x4.e); t0.o = s.x0.o & (~s.x4.o); \
s.x0.e ^= s.x2.e & (~s.x1.e); s.x0.o ^= s.x2.o & (~s.x1.o); \
s.x2.e ^= s.x4.e & (~s.x3.e); s.x2.o ^= s.x4.o & (~s.x3.o); \
s.x4.e ^= s.x1.e & (~s.x0.e); s.x4.o ^= s.x1.o & (~s.x0.o); \
s.x1.e ^= s.x3.e & (~s.x2.e); s.x1.o ^= s.x3.o & (~s.x2.o); \
s.x3.e ^= t0.e; s.x3.o ^= t0.o; \
s.x1.e ^= s.x0.e; s.x1.o ^= s.x0.o; \
s.x3.e ^= s.x2.e; s.x3.o ^= s.x2.o; \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
/* linear layer */ \
t0.e = s.x0.e ^ ROTR32(s.x0.o, 4); \
t0.o = s.x0.o ^ ROTR32(s.x0.e, 5); \
s.x0.e ^= ROTR32(t0.o, 9); \
s.x0.o ^= ROTR32(t0.e, 10); \
t0.e = s.x1.e ^ ROTR32(s.x1.e, 11); \
t0.o = s.x1.o ^ ROTR32(s.x1.o, 11); \
s.x1.e ^= ROTR32(t0.o, 19); \
s.x1.o ^= ROTR32(t0.e, 20); \
t0.e = s.x2.e ^ ROTR32(s.x2.o, 2); \
t0.o = s.x2.o ^ ROTR32(s.x2.e, 3); \
s.x2.e ^= t0.o; \
s.x2.o ^= ROTR32(t0.e, 1); \
t0.e = s.x3.e ^ ROTR32(s.x3.o, 3); \
t0.o = s.x3.o ^ ROTR32(s.x3.e, 4); \
s.x3.e ^= ROTR32(t0.e, 5); \
s.x3.o ^= ROTR32(t0.o, 5); \
t0.e = s.x4.e ^ ROTR32(s.x4.e, 17); \
t0.o = s.x4.o ^ ROTR32(s.x4.o, 17); \
s.x4.e ^= ROTR32(t0.o, 3); \
s.x4.o ^= ROTR32(t0.e, 4); \
s.x2.e = ~s.x2.e; s.x2.o = ~s.x2.o; \
} while(0)
/* clang-format on */
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
#include <string.h>
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define RATE (64 / 8)
#define PA_ROUNDS 12
int crypto_hash(unsigned char* out, const unsigned char* in,
unsigned long long inlen) {
state s;
u64 outlen;
// initialization
s.x0.h = 0xb57e273b; s.x0.l = 0x814cd416;
s.x1.h = 0x2b510425; s.x1.l = 0x62ae2420;
s.x2.h = 0x66a3a776; s.x2.l = 0x8ddf2218;
s.x3.h = 0x5aad0a7a; s.x3.l = 0x8153650c;
s.x4.h = 0x4f3e0e32; s.x4.l = 0x539493b6;
u32_2 t0;
u64 tmp0;
// absorb plaintext
while (inlen >= RATE) {
tmp0 = (*(u64*)in);
t0 = to_big(tmp0);
s.x0.h ^= t0.h;
s.x0.l ^= t0.l;
P(&s, START_ROUND(PA_ROUNDS));
inlen -= RATE;
in += RATE;
}
u8 bytes[16];
memcpy(bytes, in, inlen);
memset(bytes + inlen, 0, 16 - inlen);
bytes[inlen] ^= 0x80;
u64* tmp = (u64*)bytes;
t0 = to_big(tmp[0]);
s.x0.h ^= t0.h;
s.x0.l ^= t0.l;
P(&s, START_ROUND(PA_ROUNDS));
// squeeze hash
outlen = CRYPTO_BYTES;
while (outlen >= RATE) {
tmp0 = from_big(s.x0);
*(u64*)out = tmp0;
P(&s, START_ROUND(PA_ROUNDS));
outlen -= RATE;
out += RATE;
}
return 0;
}
Christoph Dobraunig
Martin Schläffer
Ferdinand Bachmann
#include "endian.h"
#include "permutations.h"
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_big(u64 in) {
u32_2 out;
to_big_immediate(out, in);
return out;
}
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_big(u32_2 in) {
u64 out;
from_big_immediate(out, in);
return out;
}
/*
void P(state *p, u8 start_round) {
// implemented in asm
}
*/
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 h;
u32 l;
} u32_2;
typedef struct {
u32_2 x0;
u32_2 x1;
u32_2 x2;
u32_2 x3;
u32_2 x4;
} state;
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define START_ROUND(x) (12 - (x))
u32_2 to_big(u64 in);
u64 from_big(u32_2 in);
#define to_big_immediate(out, in) \
do { \
u64 big_in = U64BIG(in); \
u32 hi = (big_in) >> 32; \
u32 lo = (u32)(big_in); \
out.h = hi; \
out.l = lo; \
} while (0)
#define from_big_immediate(out, in) \
do { \
u32 hi = in.h; \
u32 lo = in.l; \
out = (u64)hi << 32 | lo; \
out = U64BIG(out); \
} while (0)
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#include <xtensa/coreasm.h>
#define tmp0 a2
#define rnd a3
#define x0h a4
#define x0l a5
#define x1h a6
#define x1l a7
#define x2h a8
#define x2l a9
#define x3h a10
#define x3l a11
#define x4h a12
#define x4l a13
#define t0h a14
#define t0l a15
#define tmp1 a14
#define tmp2 a15
.section .text
.align 4
.global P
.type P,@function
P:
abi_entry 4
l32i x0h, a2, 0
l32i x0l, a2, 4
l32i x1h, a2, 8
l32i x1l, a2, 12
l32i x2h, a2, 16
l32i x2l, a2, 20
l32i x3h, a2, 24
l32i x3l, a2, 28
l32i x4h, a2, 32
l32i x4l, a2, 36
s32i a2, a1, 0
.Lround_start:
/* round constant */
movi tmp0, 15
sub tmp0, tmp0, rnd
slli tmp0, tmp0, 4
or tmp0, tmp0, rnd
xor x2l, x2l, tmp0
/* s-box layer */
movi tmp0, -1
/* high */
xor x0h, x0h, x4h
xor x4h, x4h, x3h
xor x2h, x2h, x1h
xor tmp1, tmp0, x4h
and tmp2, tmp1, x0h
xor tmp1, tmp0, x1h
and tmp1, tmp1, x2h
xor x0h, x0h, tmp1
xor tmp1, tmp0, x3h
and tmp1, tmp1, x4h
xor x2h, x2h, tmp1
xor tmp1, tmp0, x0h
and tmp1, tmp1, x1h
xor x4h, x4h, tmp1
xor tmp1, tmp0, x2h
and tmp1, tmp1, x3h
xor x1h, x1h, tmp1
xor x3h, x3h, tmp2
xor x1h, x1h, x0h
xor x3h, x3h, x2h
xor x0h, x0h, x4h
xor x2h, x2h, tmp0
/* low */
xor x0l, x0l, x4l
xor x4l, x4l, x3l
xor x2l, x2l, x1l
xor tmp1, tmp0, x4l
and tmp2, tmp1, x0l
xor tmp1, tmp0, x1l
and tmp1, tmp1, x2l
xor x0l, x0l, tmp1
xor tmp1, tmp0, x3l
and tmp1, tmp1, x4l
xor x2l, x2l, tmp1
xor tmp1, tmp0, x0l
and tmp1, tmp1, x1l
xor x4l, x4l, tmp1
xor tmp1, tmp0, x2l
and tmp1, tmp1, x3l
xor x1l, x1l, tmp1
xor x3l, x3l, tmp2
xor x1l, x1l, x0l
xor x3l, x3l, x2l
xor x0l, x0l, x4l
xor x2l, x2l, tmp0
/* linear layer */
ssai 19
src t0l, x0h, x0l
src t0h, x0l, x0h
xor t0l, t0l, x0l
xor t0h, t0h, x0h
ssai 28
src tmp0, x0h, x0l
src x0h, x0l, x0h
xor x0l, tmp0, t0l
xor x0h, x0h, t0h
ssai 29 /* inverted 61 */
src t0l, x1l, x1h
src t0h, x1h, x1l
xor t0l, t0l, x1l
xor t0h, t0h, x1h
ssai 7 /* inverted 39 */
src tmp0, x1l, x1h
src x1h, x1h, x1l
xor x1l, tmp0, t0l
xor x1h, x1h, t0h
ssai 1
src t0l, x2h, x2l
src t0h, x2l, x2h
xor t0l, t0l, x2l
xor t0h, t0h, x2h
ssai 6
src tmp0, x2h, x2l
src x2h, x2l, x2h
xor x2l, tmp0, t0l
xor x2h, x2h, t0h
ssai 10
src t0l, x3h, x3l
src t0h, x3l, x3h
xor t0l, t0l, x3l
xor t0h, t0h, x3h
ssai 17
src tmp0, x3h, x3l
src x3h, x3l, x3h
xor x3l, tmp0, t0l
xor x3h, x3h, t0h
ssai 7
src t0l, x4h, x4l
src t0h, x4l, x4h
xor t0l, t0l, x4l
xor t0h, t0h, x4h
ssai 9 /* inverted 41 */
src tmp0, x4l, x4h
src x4h, x4h, x4l
xor x4l, tmp0, t0l
xor x4h, x4h, t0h
/* loop */
addi rnd, rnd, 1
bltui rnd, 12, .Lround_start
.Lend:
l32i a2, a1, 0
s32i x0h, a2, 0
s32i x0l, a2, 4
s32i x1h, a2, 8
s32i x1l, a2, 12
s32i x2h, a2, 16
s32i x2l, a2, 20
s32i x3h, a2, 24
s32i x3l, a2, 28
s32i x4h, a2, 32
s32i x4l, a2, 36
abi_return
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment