Commit 9c6d9e4a by Ferdinand Bachmann Committed by Sebastian Renner

ascon

parent 4063295f
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include <string.h>
#include "core.h"
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode) {
u32_2 t0, t1;
u64 tmp0, tmp1;
while (len >= RATE) {
tmp0 = (*(u64*)in);
t0 = to_bit_interleaving_big(tmp0);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
tmp1 = (*(u64*)(in + 8));
t1 = to_bit_interleaving_big(tmp1);
s->x1.e ^= t1.e;
s->x1.o ^= t1.o;
if (mode != ASCON_AD) {
tmp0 = from_bit_interleaving_big(s->x0);
*(u64*)out = (tmp0);
tmp1 = from_bit_interleaving_big(s->x1);
*(u64*)(out + 8) = (tmp1);
}
if (mode == ASCON_DEC) {
s->x0 = t0;
s->x1 = t1;
}
P(s, START_ROUND(PB_ROUNDS));
in += RATE;
out += RATE;
len -= RATE;
}
u8 bytes[16];
memcpy(bytes, in, len);
memset(bytes + len, 0, 16 - len);
bytes[len] ^= 0x80;
u64* tmp = (u64*)bytes;
t0 = to_bit_interleaving_big(tmp[0]);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
t1 = to_bit_interleaving_big(tmp[1]);
s->x1.e ^= t1.e;
s->x1.o ^= t1.o;
if (mode != ASCON_AD) {
tmp[0] = from_bit_interleaving_big(s->x0);
tmp[1] = from_bit_interleaving_big(s->x1);
memcpy(out, bytes, len);
}
if (mode == ASCON_DEC) {
memcpy(bytes, in, len);
s->x0 = to_bit_interleaving_big(tmp[0]);
s->x1 = to_bit_interleaving_big(tmp[1]);
}
}
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode) {
u32_2 K0, K1, N0, N1;
// load key and nonce
K0 = to_bit_interleaving_big((*(u64*)k));
K1 = to_bit_interleaving_big((*(u64*)(k + 8)));
N0 = to_bit_interleaving_big((*(u64*)npub));
N1 = to_bit_interleaving_big((*(u64*)(npub + 8)));
// initialization
to_bit_interleaving_big_immediate(s->x0, IV);
s->x1.o = K0.o;
s->x1.e = K0.e;
s->x2.e = K1.e;
s->x2.o = K1.o;
s->x3.e = N0.e;
s->x3.o = N0.o;
s->x4.e = N1.e;
s->x4.o = N1.o;
P(s, START_ROUND(PA_ROUNDS));
s->x3.e ^= K0.e;
s->x3.o ^= K0.o;
s->x4.e ^= K1.e;
s->x4.o ^= K1.o;
// process associated data
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_AD);
P(s, START_ROUND(PB_ROUNDS));
}
s->x4.e ^= 1;
// process plaintext/ciphertext
process_data(s, out, in, tlen, mode);
// finalization
s->x2.e ^= K0.e;
s->x2.o ^= K0.o;
s->x3.e ^= K1.e;
s->x3.o ^= K1.o;
P(s, START_ROUND(PA_ROUNDS));
s->x3.e ^= K0.e;
s->x3.o ^= K0.o;
s->x4.e ^= K1.e;
s->x4.o ^= K1.o;
}
#ifndef CORE_H_
#define CORE_H_
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define ASCON_AD 0
#define ASCON_ENC 1
#define ASCON_DEC 2
#define RATE (128 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 0 | (u64)(8 * (RATE)) << 8 | \
(u64)(PA_ROUNDS) << 16 | (u64)(PB_ROUNDS) << 24)
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode);
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode);
#endif // CORE_H_
#include "core.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
state s;
u32_2 t0, t1;
(void)nsec;
// set plaintext size
*mlen = clen - CRYPTO_ABYTES;
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC);
// verify tag (should be constant time, check compiler output)
t0 = to_bit_interleaving_big((*(u64*)(c + *mlen)));
t1 = to_bit_interleaving_big((*(u64*)(c + *mlen + 8)));
if (((s.x3.e ^ t0.e) | (s.x3.o ^ t0.o) | (s.x4.e ^ t1.e) | (s.x4.o ^ t1.o)) !=
0) {
*mlen = 0;
return -1;
}
return 0;
}
#include "core.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
state s;
u64 tmp0, tmp1;
(void)nsec;
// set ciphertext size
*clen = mlen + CRYPTO_ABYTES;
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC);
// set tag
tmp0 = from_bit_interleaving_big(s.x3);
*(u64*)(c + mlen) = (tmp0);
tmp1 = from_bit_interleaving_big(s.x4);
*(u64*)(c + mlen + 8) = (tmp1);
return 0;
}
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
Christoph Dobraunig
Martin Schläffer
Ferdinand Bachmann
#include "endian.h"
#include "permutations.h"
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving_big(u64 in) {
u32_2 out;
to_bit_interleaving_big_immediate(out, in);
return out;
}
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving_big(u32_2 in) {
u64 out;
from_bit_interleaving_big_immediate(out, in);
return out;
}
/*
void P(state *p, u8 start_round) {
// implemented in asm
}
*/
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 e;
u32 o;
} u32_2;
typedef struct {
u32_2 x0;
u32_2 x1;
u32_2 x2;
u32_2 x3;
u32_2 x4;
} state;
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define START_ROUND(x) (12 - (x))
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving_big(u64 in);
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving_big(u32_2 in);
#define to_bit_interleaving_big_immediate(out, in) \
do { \
u64 big_in = U64BIG(in); \
u32 hi = (big_in) >> 32; \
u32 lo = (u32)(big_in); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
out.e = (lo & 0x0000FFFF) | (hi << 16); \
out.o = (lo >> 16) | (hi & 0xFFFF0000); \
} while (0)
#define from_bit_interleaving_big_immediate(out, in) \
do { \
u32 lo = (in.e & 0x0000FFFF) | (in.o << 16); \
u32 hi = (in.e >> 16) | (in.o & 0xFFFF0000); \
u32 r0, r1; \
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \
out = (u64)hi << 32 | lo; \
out = U64BIG(out); \
} while (0)
/* clang-format off */
#define ROUND(C_e, C_o) \
do { \
/* round constant */ \
s.x2.e ^= C_e; s.x2.o ^= C_o; \
/* s-bos.x layer */ \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
s.x4.e ^= s.x3.e; s.x4.o ^= s.x3.o; \
s.x2.e ^= s.x1.e; s.x2.o ^= s.x1.o; \
t0.e = s.x0.e & (~s.x4.e); t0.o = s.x0.o & (~s.x4.o); \
s.x0.e ^= s.x2.e & (~s.x1.e); s.x0.o ^= s.x2.o & (~s.x1.o); \
s.x2.e ^= s.x4.e & (~s.x3.e); s.x2.o ^= s.x4.o & (~s.x3.o); \
s.x4.e ^= s.x1.e & (~s.x0.e); s.x4.o ^= s.x1.o & (~s.x0.o); \
s.x1.e ^= s.x3.e & (~s.x2.e); s.x1.o ^= s.x3.o & (~s.x2.o); \
s.x3.e ^= t0.e; s.x3.o ^= t0.o; \
s.x1.e ^= s.x0.e; s.x1.o ^= s.x0.o; \
s.x3.e ^= s.x2.e; s.x3.o ^= s.x2.o; \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
/* linear layer */ \
t0.e = s.x0.e ^ ROTR32(s.x0.o, 4); \
t0.o = s.x0.o ^ ROTR32(s.x0.e, 5); \
s.x0.e ^= ROTR32(t0.o, 9); \
s.x0.o ^= ROTR32(t0.e, 10); \
t0.e = s.x1.e ^ ROTR32(s.x1.e, 11); \
t0.o = s.x1.o ^ ROTR32(s.x1.o, 11); \
s.x1.e ^= ROTR32(t0.o, 19); \
s.x1.o ^= ROTR32(t0.e, 20); \
t0.e = s.x2.e ^ ROTR32(s.x2.o, 2); \
t0.o = s.x2.o ^ ROTR32(s.x2.e, 3); \
s.x2.e ^= t0.o; \
s.x2.o ^= ROTR32(t0.e, 1); \
t0.e = s.x3.e ^ ROTR32(s.x3.o, 3); \
t0.o = s.x3.o ^ ROTR32(s.x3.e, 4); \
s.x3.e ^= ROTR32(t0.e, 5); \
s.x3.o ^= ROTR32(t0.o, 5); \
t0.e = s.x4.e ^ ROTR32(s.x4.e, 17); \
t0.o = s.x4.o ^ ROTR32(s.x4.o, 17); \
s.x4.e ^= ROTR32(t0.o, 3); \
s.x4.o ^= ROTR32(t0.e, 4); \
s.x2.e = ~s.x2.e; s.x2.o = ~s.x2.o; \
} while(0)
/* clang-format on */
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#include <xtensa/coreasm.h>
.section .data
.align 2
.global ascon_round_constants
.type ascon_round_constants,@object
ascon_round_constants:
.byte 0xc, 0xc
.byte 0x9, 0xc
.byte 0xc, 0x9
.byte 0x9, 0x9
.byte 0x6, 0xc
.byte 0x3, 0xc
.byte 0x6, 0x9
.byte 0x3, 0x9
.byte 0xc, 0x6
.byte 0x9, 0x6
.byte 0xc, 0x3
.byte 0x9, 0x3
#define x0e a4
#define x0o a5
#define x1e a6
#define x1o a7
#define x2e a8
#define x2o a9
#define x3e a10
#define x3o a11
#define x4e a12
#define x4o a13
#define t0e a14
#define t0o a15
#define tmp a14
#define neg a15
.section .text
.align 4
.global P
.type P,@function
P:
abi_entry 4
l32i x0e, a2, 0
l32i x0o, a2, 4
l32i x1e, a2, 8
l32i x1o, a2, 12
l32i x2e, a2, 16
l32i x2o, a2, 20
l32i x3e, a2, 24
l32i x3o, a2, 28
l32i x4e, a2, 32
l32i x4o, a2, 36
s32i a2, a1, 0
movi a2, ascon_round_constants
addx2 a2, a3, a2
.Lround_start:
/* round constant */
l8ui t0e, a2, 0
xor x2e, x2e, a14
l8ui t0o, a2, 1
xor x2o, x2o, a15
/* s-box layer */
/* even */
xor x0e, x0e, x4e
xor x4e, x4e, x3e
xor x2e, x2e, x1e
movi neg, -1
xor neg, neg, x4e
and tmp, neg, x0e
movi neg, -1
xor neg, neg, x1e
and neg, neg, x2e
xor x0e, x0e, neg
movi neg, -1
xor neg, neg, x3e
and neg, neg, x4e
xor x2e, x2e, neg
movi neg, -1
xor neg, neg, x0e
and neg, neg, x1e
xor x4e, x4e, neg
movi neg, -1
xor neg, neg, x2e
and neg, neg, x3e
xor x1e, x1e, neg
xor x3e, x3e, tmp
xor x1e, x1e, x0e
xor x3e, x3e, x2e
xor x0e, x0e, x4e
/* odd */
xor x0o, x0o, x4o
xor x4o, x4o, x3o
xor x2o, x2o, x1o
movi neg, -1
xor neg, neg, x4o
and tmp, neg, x0o
movi neg, -1
xor neg, neg, x1o
and neg, neg, x2o
xor x0o, x0o, neg
movi neg, -1
xor neg, neg, x3o
and neg, neg, x4o
xor x2o, x2o, neg
movi neg, -1
xor neg, neg, x0o
and neg, neg, x1o
xor x4o, x4o, neg
movi neg, -1
xor neg, neg, x2o
and neg, neg, x3o
xor x1o, x1o, neg
xor x3o, x3o, tmp
xor x1o, x1o, x0o
xor x3o, x3o, x2o
xor x0o, x0o, x4o
/* linear layer */
ssai 4
src t0e, x0o, x0o
xor t0e, t0e, x0e
ssai 5
src t0o, x0e, x0e
xor t0o, t0o, x0o
ssai 9
src t0o, t0o, t0o
xor x0e, x0e, t0o
ssai 10
src t0e, t0e, t0e
xor x0o, x0o, t0e
ssai 11
src t0e, x1e, x1e
xor t0e, t0e, x1e
src t0o, x1o, x1o
xor t0o, t0o, x1o
ssai 19
src t0o, t0o, t0o
xor x1e, x1e, t0o
ssai 20
src t0e, t0e, t0e
xor x1o, x1o, t0e
ssai 2
src t0e, x2o, x2o
xor t0e, t0e, x2e
ssai 3
src t0o, x2e, x2e
xor t0o, t0o, x2o
xor x2e, x2e, t0o
ssai 1
src t0e, t0e, t0e
xor x2o, x2o, t0e
ssai 3
src t0e, x3o, x3o
xor t0e, t0e, x3e
ssai 4
src t0o, x3e, x3e
xor t0o, t0o, x3o
ssai 5
src t0e, t0e, t0e
xor x3e, x3e, t0e
src t0o, t0o, t0o
xor x3o, x3o, t0o
ssai 17
src t0e, x4e, x4e
xor t0e, t0e, x4e
src t0o, x4o, x4o
xor t0o, t0o, x4o
ssai 3
src t0o, t0o, t0o
xor x4e, x4e, t0o
ssai 4
src t0e, t0e, t0e
xor x4o, x4o, t0e
movi neg, -1
xor x2e, x2e, neg
xor x2o, x2o, neg
/* loop */
addi a2, a2, 2
addi a3, a3, 1
bltui a3, 12, .Lround_start
.Lend:
l32i a2, a1, 0
s32i x0e, a2, 0
s32i x0o, a2, 4
s32i x1e, a2, 8
s32i x1o, a2, 12
s32i x2e, a2, 16
s32i x2o, a2, 20
s32i x3e, a2, 24
s32i x3o, a2, 28
s32i x4e, a2, 32
s32i x4o, a2, 36
abi_return
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include "core.h"
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode) {
u32_2 t0, t1;
u64 tmp0, tmp1;
u64 i;
while (len >= RATE) {
tmp0 = (*(u64*)in);
t0 = to_bit_interleaving_big(tmp0);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
tmp1 = (*(u64*)(in + 8));
t1 = to_bit_interleaving_big(tmp1);
s->x1.e ^= t1.e;
s->x1.o ^= t1.o;
if (mode != ASCON_AD) {
tmp0 = from_bit_interleaving_big(s->x0);
*(u64*)out = (tmp0);
tmp1 = from_bit_interleaving_big(s->x1);
*(u64*)(out + 8) = (tmp1);
}
if (mode == ASCON_DEC) {
s->x0 = t0;
s->x1 = t1;
}
P(s, PB_ROUNDS);
in += RATE;
out += RATE;
len -= RATE;
}
tmp0 = 0;
tmp1 = 0;
for (i = 0; i < len; ++i, ++in)
if (i < 8)
tmp0 ^= INS_BYTE64(*in, i);
else
tmp1 ^= INS_BYTE64(*in, i % 8);
in -= len;
if (len < 8)
tmp0 ^= INS_BYTE64(0x80, len);
else
tmp1 ^= INS_BYTE64(0x80, len % 8);
t0 = to_bit_interleaving(tmp0);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
t1 = to_bit_interleaving(tmp1);
s->x1.e ^= t1.e;
s->x1.o ^= t1.o;
if (mode != ASCON_AD) {
tmp0 = from_bit_interleaving(s->x0);
tmp1 = from_bit_interleaving(s->x1);
for (i = 0; i < len; ++i, ++out)
if (i < 8)
*out = EXT_BYTE64(tmp0, i);
else
*out = EXT_BYTE64(tmp1, i % 8);
}
if (mode == ASCON_DEC) {
for (i = 0; i < len; ++i, ++in)
if (i < 8) {
tmp0 &= ~INS_BYTE64(0xff, i);
tmp0 |= INS_BYTE64(*in, i);
} else {
tmp1 &= ~INS_BYTE64(0xff, i % 8);
tmp1 |= INS_BYTE64(*in, i % 8);
}
s->x0 = to_bit_interleaving(tmp0);
s->x1 = to_bit_interleaving(tmp1);
}
}
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode) {
u32_2 K0, K1, N0, N1;
// load key and nonce
K0 = to_bit_interleaving_big((*(u64*)k));
K1 = to_bit_interleaving_big((*(u64*)(k + 8)));
N0 = to_bit_interleaving_big((*(u64*)npub));
N1 = to_bit_interleaving_big((*(u64*)(npub + 8)));
// initialization
s->x0 = to_bit_interleaving(IV);
s->x1.o = K0.o;
s->x1.e = K0.e;
s->x2.e = K1.e;
s->x2.o = K1.o;
s->x3.e = N0.e;
s->x3.o = N0.o;
s->x4.e = N1.e;
s->x4.o = N1.o;