Commit 694fe7f9 by Enrico Pozzobon

Merge branch 'master' into patched-for-tester

parents 7d44261a 41412efd
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include "core.h"
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode) {
u32_2 t0, t1;
u64 tmp0, tmp1;
u64 i;
while (len >= RATE) {
tmp0 = U64BIG(*(u64*)in);
t0 = to_bit_interleaving(tmp0);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
tmp1 = U64BIG(*(u64*)(in + 8));
t1 = to_bit_interleaving(tmp1);
s->x1.e ^= t1.e;
s->x1.o ^= t1.o;
if (mode != ASCON_AD) {
tmp0 = from_bit_interleaving(s->x0);
*(u64*)out = U64BIG(tmp0);
tmp1 = from_bit_interleaving(s->x1);
*(u64*)(out + 8) = U64BIG(tmp1);
}
if (mode == ASCON_DEC) {
s->x0 = t0;
s->x1 = t1;
}
P(s, PB_ROUNDS);
in += RATE;
out += RATE;
len -= RATE;
}
tmp0 = 0;
tmp1 = 0;
for (i = 0; i < len; ++i, ++in)
if (i < 8)
tmp0 ^= INS_BYTE64(*in, i);
else
tmp1 ^= INS_BYTE64(*in, i % 8);
in -= len;
if (len < 8)
tmp0 ^= INS_BYTE64(0x80, len);
else
tmp1 ^= INS_BYTE64(0x80, len % 8);
t0 = to_bit_interleaving(tmp0);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
t1 = to_bit_interleaving(tmp1);
s->x1.e ^= t1.e;
s->x1.o ^= t1.o;
if (mode != ASCON_AD) {
tmp0 = from_bit_interleaving(s->x0);
tmp1 = from_bit_interleaving(s->x1);
for (i = 0; i < len; ++i, ++out)
if (i < 8)
*out = EXT_BYTE64(tmp0, i);
else
*out = EXT_BYTE64(tmp1, i % 8);
}
if (mode == ASCON_DEC) {
for (i = 0; i < len; ++i, ++in)
if (i < 8) {
tmp0 &= ~INS_BYTE64(0xff, i);
tmp0 |= INS_BYTE64(*in, i);
} else {
tmp1 &= ~INS_BYTE64(0xff, i % 8);
tmp1 |= INS_BYTE64(*in, i % 8);
}
s->x0 = to_bit_interleaving(tmp0);
s->x1 = to_bit_interleaving(tmp1);
}
}
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode) {
u32_2 K0, K1, N0, N1;
// load key and nonce
K0 = to_bit_interleaving(U64BIG(*(u64*)k));
K1 = to_bit_interleaving(U64BIG(*(u64*)(k + 8)));
N0 = to_bit_interleaving(U64BIG(*(u64*)npub));
N1 = to_bit_interleaving(U64BIG(*(u64*)(npub + 8)));
// initialization
s->x0 = to_bit_interleaving(IV);
s->x1.o = K0.o;
s->x1.e = K0.e;
s->x2.e = K1.e;
s->x2.o = K1.o;
s->x3.e = N0.e;
s->x3.o = N0.o;
s->x4.e = N1.e;
s->x4.o = N1.o;
P(s, PA_ROUNDS);
s->x3.e ^= K0.e;
s->x3.o ^= K0.o;
s->x4.e ^= K1.e;
s->x4.o ^= K1.o;
// process associated data
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_AD);
P(s, PB_ROUNDS);
}
s->x4.e ^= 1;
// process plaintext/ciphertext
process_data(s, out, in, tlen, mode);
// finalization
s->x2.e ^= K0.e;
s->x2.o ^= K0.o;
s->x3.e ^= K1.e;
s->x3.o ^= K1.o;
P(s, PA_ROUNDS);
s->x3.e ^= K0.e;
s->x3.o ^= K0.o;
s->x4.e ^= K1.e;
s->x4.o ^= K1.o;
}
#ifndef CORE_H_
#define CORE_H_
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define ASCON_AD 0
#define ASCON_ENC 1
#define ASCON_DEC 2
#define RATE (128 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \
(u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32)
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode);
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode);
#endif // CORE_H_
#include "core.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
state s;
u32_2 t0, t1;
(void)nsec;
// set plaintext size
*mlen = clen - CRYPTO_ABYTES;
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC);
// verify tag (should be constant time, check compiler output)
t0 = to_bit_interleaving(U64BIG(*(u64*)(c + *mlen)));
t1 = to_bit_interleaving(U64BIG(*(u64*)(c + *mlen + 8)));
if (((s.x3.e ^ t0.e) | (s.x3.o ^ t0.o) | (s.x4.e ^ t1.e) | (s.x4.o ^ t1.o)) !=
0) {
*mlen = 0;
return -1;
}
return 0;
}
#include "core.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
state s;
u64 tmp0, tmp1;
(void)nsec;
// set ciphertext size
*clen = mlen + CRYPTO_ABYTES;
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC);
// set tag
tmp0 = from_bit_interleaving(s.x3);
*(u64*)(c + mlen) = U64BIG(tmp0);
tmp1 = from_bit_interleaving(s.x4);
*(u64*)(c + mlen + 8) = U64BIG(tmp1);
return 0;
}
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
#include "permutations.h"
static const u8 constants[][2] = {
{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, {0x6, 0xc}, {0x3, 0xc},
{0x6, 0x9}, {0x3, 0x9}, {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}};
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving(u64 in) {
u32 hi = (in) >> 32;
u32 lo = (u32)(in);
u32 r0, r1;
u32_2 out;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
out.e = (lo & 0x0000FFFF) | (hi << 16);
out.o = (lo >> 16) | (hi & 0xFFFF0000);
return out;
}
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving(u32_2 in) {
u32 lo = (in.e & 0x0000FFFF) | (in.o << 16);
u32 hi = (in.e >> 16) | (in.o & 0xFFFF0000);
u32 r0, r1;
u64 out;
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
out = (u64)hi << 32 | lo;
return out;
}
void P(state *p, u8 rounds) {
state s = *p;
u32_2 t0, t1, t2, t3, t4;
u32 i, start = START_ROUND(rounds);
for (i = start; i < 12; i++) ROUND(constants[i][0], constants[i][1]);
*p = s;
}
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 e;
u32 o;
} u32_2;
typedef struct {
u32_2 x0;
u32_2 x1;
u32_2 x2;
u32_2 x3;
u32_2 x4;
} state;
#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n)))))
#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n))))
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define START_ROUND(x) (12 - (x))
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving(u64 in);
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving(u32_2 in);
/* clang-format off */
#define ROUND(C_e, C_o) \
do { \
/* round constant */ \
s.x2.e ^= C_e; s.x2.o ^= C_o; \
/* s-box layer */ \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
s.x4.e ^= s.x3.e; s.x4.o ^= s.x3.o; \
s.x2.e ^= s.x1.e; s.x2.o ^= s.x1.o; \
t0.e = s.x0.e; t0.o = s.x0.o; \
t4.e = s.x4.e; t4.o = s.x4.o; \
t3.e = s.x3.e; t3.o = s.x3.o; \
t1.e = s.x1.e; t1.o = s.x1.o; \
t2.e = s.x2.e; t2.o = s.x2.o; \
s.x0.e = t0.e ^ (~t1.e & t2.e); s.x0.o = t0.o ^ (~t1.o & t2.o); \
s.x2.e = t2.e ^ (~t3.e & t4.e); s.x2.o = t2.o ^ (~t3.o & t4.o); \
s.x4.e = t4.e ^ (~t0.e & t1.e); s.x4.o = t4.o ^ (~t0.o & t1.o); \
s.x1.e = t1.e ^ (~t2.e & t3.e); s.x1.o = t1.o ^ (~t2.o & t3.o); \
s.x3.e = t3.e ^ (~t4.e & t0.e); s.x3.o = t3.o ^ (~t4.o & t0.o); \
s.x1.e ^= s.x0.e; s.x1.o ^= s.x0.o; \
s.x3.e ^= s.x2.e; s.x3.o ^= s.x2.o; \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
/* linear layer */ \
t0.e = s.x0.e ^ ROTR32(s.x0.o, 4); t0.o = s.x0.o ^ ROTR32(s.x0.e, 5); \
t1.e = s.x1.e ^ ROTR32(s.x1.e, 11); t1.o = s.x1.o ^ ROTR32(s.x1.o, 11); \
t2.e = s.x2.e ^ ROTR32(s.x2.o, 2); t2.o = s.x2.o ^ ROTR32(s.x2.e, 3); \
t3.e = s.x3.e ^ ROTR32(s.x3.o, 3); t3.o = s.x3.o ^ ROTR32(s.x3.e, 4); \
t4.e = s.x4.e ^ ROTR32(s.x4.e, 17); t4.o = s.x4.o ^ ROTR32(s.x4.o, 17); \
s.x0.e ^= ROTR32(t0.o, 9); s.x0.o ^= ROTR32(t0.e, 10); \
s.x1.e ^= ROTR32(t1.o, 19); s.x1.o ^= ROTR32(t1.e, 20); \
s.x2.e ^= t2.o; s.x2.o ^= ROTR32(t2.e, 1); \
s.x3.e ^= ROTR32(t3.e, 5); s.x3.o ^= ROTR32(t3.o, 5); \
s.x4.e ^= ROTR32(t4.o, 3); s.x4.o ^= ROTR32(t4.e, 4); \
s.x2.e = ~s.x2.e; s.x2.o = ~s.x2.o; \
} while(0)
/* clang-format on */
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include "core.h"
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode) {
u64* x;
u64 i;
while (len >= RATE) {
s->x0 ^= U64BIG(*(u64*)in);
s->x1 ^= U64BIG(*(u64*)(in + 8));
if (mode != ASCON_AD) {
*(u64*)out = U64BIG(s->x0);
*(u64*)(out + 8) = U64BIG(s->x1);
}
if (mode == ASCON_DEC) {
s->x0 = U64BIG(*((u64*)in));
s->x1 = U64BIG(*((u64*)(in + 8)));
}
P(s, PB_ROUNDS);
in += RATE;
out += RATE;
len -= RATE;
}
for (i = 0; i < len; ++i, ++out, ++in) {
if (i < 8)
x = &(s->x0);
else
x = &(s->x1);
*x ^= INS_BYTE64(*in, i % 8);
if (mode != ASCON_AD) *out = EXT_BYTE64(*x, i % 8);
if (mode == ASCON_DEC) {
*x &= ~INS_BYTE64(0xff, i % 8);
*x |= INS_BYTE64(*in, i % 8);
}
}
if (len < 8)
s->x0 ^= INS_BYTE64(0x80, len);
else
s->x1 ^= INS_BYTE64(0x80, len % 8);
}
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode) {
const u64 K0 = U64BIG(*(u64*)k);
const u64 K1 = U64BIG(*(u64*)(k + 8));
const u64 N0 = U64BIG(*(u64*)npub);
const u64 N1 = U64BIG(*(u64*)(npub + 8));
// initialization
s->x0 = IV;
s->x1 = K0;
s->x2 = K1;
s->x3 = N0;
s->x4 = N1;
P(s, PA_ROUNDS);
s->x3 ^= K0;
s->x4 ^= K1;
// process associated data
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_AD);
P(s, PB_ROUNDS);
}
s->x4 ^= 1;
// process plaintext/ciphertext
process_data(s, out, in, tlen, mode);
// finalization
s->x2 ^= K0;
s->x3 ^= K1;
P(s, PA_ROUNDS);
s->x3 ^= K0;
s->x4 ^= K1;
}
#ifndef CORE_H_
#define CORE_H_
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define ASCON_AD 0
#define ASCON_ENC 1
#define ASCON_DEC 2
#define RATE (128 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \
(u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32)
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode);
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode);
#endif // CORE_H_
#include "core.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
state s;
(void)nsec;
// set plaintext size
*mlen = clen - CRYPTO_ABYTES;
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC);
// verify tag (should be constant time, check compiler output)
if (((s.x3 ^ U64BIG(*(u64*)(c + *mlen))) |
(s.x4 ^ U64BIG(*(u64*)(c + *mlen + 8)))) != 0) {
*mlen = 0;
return -1;
}
return 0;
}
#include "core.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
state s;
(void)nsec;
// set ciphertext size
*clen = mlen + CRYPTO_ABYTES;
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC);
// set tag
*(u64*)(c + mlen) = U64BIG(s.x3);
*(u64*)(c + mlen + 8) = U64BIG(s.x4);
return 0;
}
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
#include "permutations.h"
void P(state *p, u8 rounds) {
state s = *p;
u8 i, start = START_CONSTANT(rounds);
for (i = start; i > 0x4a; i -= 0x0f) ROUND(i);
*p = s;
}
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned long long u64;
typedef struct {
u64 x0, x1, x2, x3, x4;
} state;
#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n)))))
#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n))))
#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
#define START_CONSTANT(x) (((0xf - (12 - (x))) << 4) | (12 - (x)))
#define ROUND(C) \
do { \
state t; \
s.x2 ^= C; \
s.x0 ^= s.x4; \
s.x4 ^= s.x3; \
s.x2 ^= s.x1; \
t.x0 = s.x0; \
t.x4 = s.x4; \
t.x3 = s.x3; \
t.x1 = s.x1; \
t.x2 = s.x2; \
s.x0 = t.x0 ^ ((~t.x1) & t.x2); \
s.x2 = t.x2 ^ ((~t.x3) & t.x4); \
s.x4 = t.x4 ^ ((~t.x0) & t.x1); \
s.x1 = t.x1 ^ ((~t.x2) & t.x3); \
s.x3 = t.x3 ^ ((~t.x4) & t.x0); \
s.x1 ^= s.x0; \
t.x1 = s.x1; \
s.x1 = ROTR64(s.x1, 39); \
s.x3 ^= s.x2; \
t.x2 = s.x2; \
s.x2 = ROTR64(s.x2, 1); \
t.x4 = s.x4; \
t.x2 ^= s.x2; \
s.x2 = ROTR64(s.x2, 6 - 1); \
t.x3 = s.x3; \
t.x1 ^= s.x1; \
s.x3 = ROTR64(s.x3, 10); \
s.x0 ^= s.x4; \
s.x4 = ROTR64(s.x4, 7); \
t.x3 ^= s.x3; \
s.x2 ^= t.x2; \
s.x1 = ROTR64(s.x1, 61 - 39); \
t.x0 = s.x0; \
s.x2 = ~s.x2; \
s.x3 = ROTR64(s.x3, 17 - 10); \
t.x4 ^= s.x4; \
s.x4 = ROTR64(s.x4, 41 - 7); \
s.x3 ^= t.x3; \
s.x1 ^= t.x1; \
s.x0 = ROTR64(s.x0, 19); \
s.x4 ^= t.x4; \
t.x0 ^= s.x0; \
s.x0 = ROTR64(s.x0, 28 - 19); \
s.x0 ^= t.x0; \
} while (0)
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include "core.h"
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode) {
u32_2 t0;
u64 tmp0;
u64 i;
while (len >= RATE) {
tmp0 = U64BIG(*(u64*)in);
t0 = to_bit_interleaving(tmp0);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
if (mode != ASCON_AD) {
tmp0 = from_bit_interleaving(s->x0);
*(u64*)out = U64BIG(tmp0);
}
if (mode == ASCON_DEC) s->x0 = t0;
P(s, PB_ROUNDS);
in += RATE;
out += RATE;
len -= RATE;
}
tmp0 = 0;
for (i = 0; i < len; ++i, ++in) tmp0 |= INS_BYTE64(*in, i);
in -= len;
tmp0 |= INS_BYTE64(0x80, len);
t0 = to_bit_interleaving(tmp0);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
if (mode != ASCON_AD) {
tmp0 = from_bit_interleaving(s->x0);
for (i = 0; i < len; ++i, ++out) *out = EXT_BYTE64(tmp0, i);
}
if (mode == ASCON_DEC) {
for (i = 0; i < len; ++i, ++in) {
tmp0 &= ~INS_BYTE64(0xff, i);
tmp0 |= INS_BYTE64(*in, i);
}
s->x0 = to_bit_interleaving(tmp0);
}
}
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode) {
u32_2 K0, K1, N0, N1;
// load key and nonce
K0 = to_bit_interleaving(U64BIG(*(u64*)k));
K1 = to_bit_interleaving(U64BIG(*(u64*)(k + 8)));
N0 = to_bit_interleaving(U64BIG(*(u64*)npub));
N1 = to_bit_interleaving(U64BIG(*(u64*)(npub + 8)));
// initialization
s->x0 = to_bit_interleaving(IV);
s->x1.o = K0.o;
s->x1.e = K0.e;
s->x2.e = K1.e;
s->x2.o = K1.o;
s->x3.e = N0.e;
s->x3.o = N0.o;
s->x4.e = N1.e;
s->x4.o = N1.o;
P(s, PA_ROUNDS);
s->x3.e ^= K0.e;
s->x3.o ^= K0.o;
s->x4.e ^= K1.e;
s->x4.o ^= K1.o;
// process associated data
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_AD);
P(s, PB_ROUNDS);
}
s->x4.e ^= 1;
// process plaintext/ciphertext
process_data(s, out, in, tlen, mode);
// finalization
s->x1.e ^= K0.e;
s->x1.o ^= K0.o;
s->x2.e ^= K1.e;
s->x2.o ^= K1.o;
P(s, PA_ROUNDS);
s->x3.e ^= K0.e;
s->x3.o ^= K0.o;
s->x4.e ^= K1.e;
s->x4.o ^= K1.o;
}
#ifndef CORE_H_
#define CORE_H_
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define ASCON_AD 0
#define ASCON_ENC 1
#define ASCON_DEC 2
#define RATE (64 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \
(u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32)
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode);
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode);
#endif // CORE_H_
#include "core.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
state s;
u32_2 t0, t1;
(void)nsec;
// set plaintext size
*mlen = clen - CRYPTO_ABYTES;
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC);
// verify tag (should be constant time, check compiler output)
t0 = to_bit_interleaving(U64BIG(*(u64*)(c + *mlen)));
t1 = to_bit_interleaving(U64BIG(*(u64*)(c + *mlen + 8)));
if (((s.x3.e ^ t0.e) | (s.x3.o ^ t0.o) | (s.x4.e ^ t1.e) | (s.x4.o ^ t1.o)) !=
0) {
*mlen = 0;
return -1;
}
return 0;
}
#include "core.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
state s;
u64 tmp0, tmp1;
(void)nsec;
// set ciphertext size
*clen = mlen + CRYPTO_ABYTES;
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC);
// set tag
tmp0 = from_bit_interleaving(s.x3);
*(u64*)(c + mlen) = U64BIG(tmp0);
tmp1 = from_bit_interleaving(s.x4);
*(u64*)(c + mlen + 8) = U64BIG(tmp1);
return 0;
}
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
#include "permutations.h"
static const u8 constants[][2] = {
{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, {0x6, 0xc}, {0x3, 0xc},
{0x6, 0x9}, {0x3, 0x9}, {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}};
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving(u64 in) {
u32 hi = (in) >> 32;
u32 lo = (u32)(in);
u32 r0, r1;
u32_2 out;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
out.e = (lo & 0x0000FFFF) | (hi << 16);
out.o = (lo >> 16) | (hi & 0xFFFF0000);
return out;
}
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving(u32_2 in) {
u32 lo = (in.e & 0x0000FFFF) | (in.o << 16);
u32 hi = (in.e >> 16) | (in.o & 0xFFFF0000);
u32 r0, r1;
u64 out;
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
out = (u64)hi << 32 | lo;
return out;
}
void P(state *p, u8 rounds) {
state s = *p;
u32_2 t0, t1, t2, t3, t4;
u32 i, start = START_ROUND(rounds);
for (i = start; i < 12; i++) ROUND(constants[i][0], constants[i][1]);
*p = s;
}
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 e;
u32 o;
} u32_2;
typedef struct {
u32_2 x0;
u32_2 x1;
u32_2 x2;
u32_2 x3;
u32_2 x4;
} state;
#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n)))))
#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n))))
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define START_ROUND(x) (12 - (x))
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving(u64 in);
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving(u32_2 in);
/* clang-format off */
#define ROUND(C_e, C_o) \
do { \
/* round constant */ \
s.x2.e ^= C_e; s.x2.o ^= C_o; \
/* s-box layer */ \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
s.x4.e ^= s.x3.e; s.x4.o ^= s.x3.o; \
s.x2.e ^= s.x1.e; s.x2.o ^= s.x1.o; \
t0.e = s.x0.e; t0.o = s.x0.o; \
t4.e = s.x4.e; t4.o = s.x4.o; \
t3.e = s.x3.e; t3.o = s.x3.o; \
t1.e = s.x1.e; t1.o = s.x1.o; \
t2.e = s.x2.e; t2.o = s.x2.o; \
s.x0.e = t0.e ^ (~t1.e & t2.e); s.x0.o = t0.o ^ (~t1.o & t2.o); \
s.x2.e = t2.e ^ (~t3.e & t4.e); s.x2.o = t2.o ^ (~t3.o & t4.o); \
s.x4.e = t4.e ^ (~t0.e & t1.e); s.x4.o = t4.o ^ (~t0.o & t1.o); \
s.x1.e = t1.e ^ (~t2.e & t3.e); s.x1.o = t1.o ^ (~t2.o & t3.o); \
s.x3.e = t3.e ^ (~t4.e & t0.e); s.x3.o = t3.o ^ (~t4.o & t0.o); \
s.x1.e ^= s.x0.e; s.x1.o ^= s.x0.o; \
s.x3.e ^= s.x2.e; s.x3.o ^= s.x2.o; \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
/* linear layer */ \
t0.e = s.x0.e ^ ROTR32(s.x0.o, 4); t0.o = s.x0.o ^ ROTR32(s.x0.e, 5); \
t1.e = s.x1.e ^ ROTR32(s.x1.e, 11); t1.o = s.x1.o ^ ROTR32(s.x1.o, 11); \
t2.e = s.x2.e ^ ROTR32(s.x2.o, 2); t2.o = s.x2.o ^ ROTR32(s.x2.e, 3); \
t3.e = s.x3.e ^ ROTR32(s.x3.o, 3); t3.o = s.x3.o ^ ROTR32(s.x3.e, 4); \
t4.e = s.x4.e ^ ROTR32(s.x4.e, 17); t4.o = s.x4.o ^ ROTR32(s.x4.o, 17); \
s.x0.e ^= ROTR32(t0.o, 9); s.x0.o ^= ROTR32(t0.e, 10); \
s.x1.e ^= ROTR32(t1.o, 19); s.x1.o ^= ROTR32(t1.e, 20); \
s.x2.e ^= t2.o; s.x2.o ^= ROTR32(t2.e, 1); \
s.x3.e ^= ROTR32(t3.e, 5); s.x3.o ^= ROTR32(t3.o, 5); \
s.x4.e ^= ROTR32(t4.o, 3); s.x4.o ^= ROTR32(t4.e, 4); \
s.x2.e = ~s.x2.e; s.x2.o = ~s.x2.o; \
} while(0)
/* clang-format on */
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include "core.h"
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode) {
u64 i;
while (len >= RATE) {
s->x0 ^= U64BIG(*(u64*)in);
if (mode != ASCON_AD) *(u64*)out = U64BIG(s->x0);
if (mode == ASCON_DEC) s->x0 = U64BIG(*((u64*)in));
P(s, PB_ROUNDS);
in += RATE;
out += RATE;
len -= RATE;
}
for (i = 0; i < len; ++i, ++out, ++in) {
s->x0 ^= INS_BYTE64(*in, i);
if (mode != ASCON_AD) *out = EXT_BYTE64(s->x0, i);
if (mode == ASCON_DEC) {
s->x0 &= ~INS_BYTE64(0xff, i);
s->x0 |= INS_BYTE64(*in, i);
}
}
s->x0 ^= INS_BYTE64(0x80, len);
}
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode) {
const u64 K0 = U64BIG(*(u64*)k);
const u64 K1 = U64BIG(*(u64*)(k + 8));
const u64 N0 = U64BIG(*(u64*)npub);
const u64 N1 = U64BIG(*(u64*)(npub + 8));
// initialization
s->x0 = IV;
s->x1 = K0;
s->x2 = K1;
s->x3 = N0;
s->x4 = N1;
P(s, PA_ROUNDS);
s->x3 ^= K0;
s->x4 ^= K1;
// process associated data
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_AD);
P(s, PB_ROUNDS);
}
s->x4 ^= 1;
// process plaintext/ciphertext
process_data(s, out, in, tlen, mode);
// finalization
s->x1 ^= K0;
s->x2 ^= K1;
P(s, PA_ROUNDS);
s->x3 ^= K0;
s->x4 ^= K1;
}
#ifndef CORE_H_
#define CORE_H_
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define ASCON_AD 0
#define ASCON_ENC 1
#define ASCON_DEC 2
#define RATE (64 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \
(u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32)
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode);
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode);
#endif // CORE_H_
#include "core.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
state s;
(void)nsec;
// set plaintext size
*mlen = clen - CRYPTO_ABYTES;
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC);
// verify tag (should be constant time, check compiler output)
if (((s.x3 ^ U64BIG(*(u64*)(c + *mlen))) |
(s.x4 ^ U64BIG(*(u64*)(c + *mlen + 8)))) != 0) {
*mlen = 0;
return -1;
}
return 0;
}
#include "core.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
state s;
(void)nsec;
// set ciphertext size
*clen = mlen + CRYPTO_ABYTES;
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC);
// set tag
*(u64*)(c + mlen) = U64BIG(s.x3);
*(u64*)(c + mlen + 8) = U64BIG(s.x4);
return 0;
}
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
#include "permutations.h"
void P(state *p, u8 rounds) {
state s = *p;
u8 i, start = START_CONSTANT(rounds);
for (i = start; i > 0x4a; i -= 0x0f) ROUND(i);
*p = s;
}
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned long long u64;
typedef struct {
u64 x0, x1, x2, x3, x4;
} state;
#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n)))))
#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n))))
#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
#define START_CONSTANT(x) (((0xf - (12 - (x))) << 4) | (12 - (x)))
#define ROUND(C) \
do { \
state t; \
s.x2 ^= C; \
s.x0 ^= s.x4; \
s.x4 ^= s.x3; \
s.x2 ^= s.x1; \
t.x0 = s.x0; \
t.x4 = s.x4; \
t.x3 = s.x3; \
t.x1 = s.x1; \
t.x2 = s.x2; \
s.x0 = t.x0 ^ ((~t.x1) & t.x2); \
s.x2 = t.x2 ^ ((~t.x3) & t.x4); \
s.x4 = t.x4 ^ ((~t.x0) & t.x1); \
s.x1 = t.x1 ^ ((~t.x2) & t.x3); \
s.x3 = t.x3 ^ ((~t.x4) & t.x0); \
s.x1 ^= s.x0; \
t.x1 = s.x1; \
s.x1 = ROTR64(s.x1, 39); \
s.x3 ^= s.x2; \
t.x2 = s.x2; \
s.x2 = ROTR64(s.x2, 1); \
t.x4 = s.x4; \
t.x2 ^= s.x2; \
s.x2 = ROTR64(s.x2, 6 - 1); \
t.x3 = s.x3; \
t.x1 ^= s.x1; \
s.x3 = ROTR64(s.x3, 10); \
s.x0 ^= s.x4; \
s.x4 = ROTR64(s.x4, 7); \
t.x3 ^= s.x3; \
s.x2 ^= t.x2; \
s.x1 = ROTR64(s.x1, 61 - 39); \
t.x0 = s.x0; \
s.x2 = ~s.x2; \
s.x3 = ROTR64(s.x3, 17 - 10); \
t.x4 ^= s.x4; \
s.x4 = ROTR64(s.x4, 41 - 7); \
s.x3 ^= t.x3; \
s.x1 ^= t.x1; \
s.x0 = ROTR64(s.x0, 19); \
s.x4 ^= t.x4; \
t.x0 ^= s.x0; \
s.x0 = ROTR64(s.x0, 28 - 19); \
s.x0 ^= t.x0; \
} while (0)
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#define CRYPTO_KEYBYTES 20
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include "core.h"
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode) {
u64 i;
while (len >= RATE) {
s->x0 ^= U64BIG(*(u64*)in);
if (mode != ASCON_AD) *(u64*)out = U64BIG(s->x0);
if (mode == ASCON_DEC) s->x0 = U64BIG(*((u64*)in));
P(s, PB_ROUNDS);
in += RATE;
out += RATE;
len -= RATE;
}
for (i = 0; i < len; ++i, ++out, ++in) {
s->x0 ^= INS_BYTE64(*in, i);
if (mode != ASCON_AD) *out = EXT_BYTE64(s->x0, i);
if (mode == ASCON_DEC) {
s->x0 &= ~INS_BYTE64(0xff, i);
s->x0 |= INS_BYTE64(*in, i);
}
}
s->x0 ^= INS_BYTE64(0x80, len);
}
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode) {
const u64 K0 = U64BIG(*(u64*)(k + 0)) >> 32;
const u64 K1 = U64BIG(*(u64*)(k + 4));
const u64 K2 = U64BIG(*(u64*)(k + 12));
const u64 N0 = U64BIG(*(u64*)npub);
const u64 N1 = U64BIG(*(u64*)(npub + 8));
// initialization
s->x0 = IV | K0;
s->x1 = K1;
s->x2 = K2;
s->x3 = N0;
s->x4 = N1;
P(s, PA_ROUNDS);
s->x2 ^= K0;
s->x3 ^= K1;
s->x4 ^= K2;
// process associated data
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_AD);
P(s, PB_ROUNDS);
}
s->x4 ^= 1;
// process plaintext/ciphertext
process_data(s, out, in, tlen, mode);
// finalization
s->x1 ^= K0 << 32 | K1 >> 32;
s->x2 ^= K1 << 32 | K2 >> 32;
s->x3 ^= K2 << 32;
P(s, PA_ROUNDS);
s->x3 ^= K1;
s->x4 ^= K2;
}
#ifndef CORE_H_
#define CORE_H_
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define ASCON_AD 0
#define ASCON_ENC 1
#define ASCON_DEC 2
#define RATE (64 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \
(u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32)
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode);
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode);
#endif // CORE_H_
#include "core.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
state s;
(void)nsec;
// set plaintext size
*mlen = clen - CRYPTO_ABYTES;
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC);
// verify tag (should be constant time, check compiler output)
if (((s.x3 ^ U64BIG(*(u64*)(c + *mlen))) |
(s.x4 ^ U64BIG(*(u64*)(c + *mlen + 8)))) != 0) {
*mlen = 0;
return -1;
}
return 0;
}
#include "core.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
state s;
(void)nsec;
// set ciphertext size
*clen = mlen + CRYPTO_ABYTES;
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC);
// set tag
*(u64*)(c + mlen) = U64BIG(s.x3);
*(u64*)(c + mlen + 8) = U64BIG(s.x4);
return 0;
}
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
#include "permutations.h"
void P(state *p, u8 rounds) {
state s = *p;
u8 i, start = START_CONSTANT(rounds);
for (i = start; i > 0x4a; i -= 0x0f) ROUND(i);
*p = s;
}
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned long long u64;
typedef struct {
u64 x0, x1, x2, x3, x4;
} state;
#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n)))))
#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n))))
#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
#define START_CONSTANT(x) (((0xf - (12 - (x))) << 4) | (12 - (x)))
#define ROUND(C) \
do { \
state t; \
s.x2 ^= C; \
s.x0 ^= s.x4; \
s.x4 ^= s.x3; \
s.x2 ^= s.x1; \
t.x0 = s.x0; \
t.x4 = s.x4; \
t.x3 = s.x3; \
t.x1 = s.x1; \
t.x2 = s.x2; \
s.x0 = t.x0 ^ ((~t.x1) & t.x2); \
s.x2 = t.x2 ^ ((~t.x3) & t.x4); \
s.x4 = t.x4 ^ ((~t.x0) & t.x1); \
s.x1 = t.x1 ^ ((~t.x2) & t.x3); \
s.x3 = t.x3 ^ ((~t.x4) & t.x0); \
s.x1 ^= s.x0; \
t.x1 = s.x1; \
s.x1 = ROTR64(s.x1, 39); \
s.x3 ^= s.x2; \
t.x2 = s.x2; \
s.x2 = ROTR64(s.x2, 1); \
t.x4 = s.x4; \
t.x2 ^= s.x2; \
s.x2 = ROTR64(s.x2, 6 - 1); \
t.x3 = s.x3; \
t.x1 ^= s.x1; \
s.x3 = ROTR64(s.x3, 10); \
s.x0 ^= s.x4; \
s.x4 = ROTR64(s.x4, 7); \
t.x3 ^= s.x3; \
s.x2 ^= t.x2; \
s.x1 = ROTR64(s.x1, 61 - 39); \
t.x0 = s.x0; \
s.x2 = ~s.x2; \
s.x3 = ROTR64(s.x3, 17 - 10); \
t.x4 ^= s.x4; \
s.x4 = ROTR64(s.x4, 41 - 7); \
s.x3 ^= t.x3; \
s.x1 ^= t.x1; \
s.x0 = ROTR64(s.x0, 19); \
s.x4 ^= t.x4; \
t.x0 ^= s.x0; \
s.x0 = ROTR64(s.x0, 28 - 19); \
s.x0 ^= t.x0; \
} while (0)
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -23,6 +23,8 @@ ...@@ -23,6 +23,8 @@
#ifndef LW_INTERNAL_FORKSKINNY_H #ifndef LW_INTERNAL_FORKSKINNY_H
#define LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H
#include "internal-util.h"
/** /**
* \file internal-forkskinny.h * \file internal-forkskinny.h
* \brief ForkSkinny block cipher family. * \brief ForkSkinny block cipher family.
...@@ -39,6 +41,158 @@ extern "C" { ...@@ -39,6 +41,158 @@ extern "C" {
#endif #endif
/** /**
* \brief State information for ForkSkinny-128-256.
*/
typedef struct
{
uint32_t TK1[4]; /**< First part of the tweakey */
uint32_t TK2[4]; /**< Second part of the tweakey */
uint32_t S[4]; /**< Current block state */
} forkskinny_128_256_state_t;
/**
* \brief State information for ForkSkinny-128-384.
*/
typedef struct
{
uint32_t TK1[4]; /**< First part of the tweakey */
uint32_t TK2[4]; /**< Second part of the tweakey */
uint32_t TK3[4]; /**< Third part of the tweakey */
uint32_t S[4]; /**< Current block state */
} forkskinny_128_384_state_t;
/**
* \brief State information for ForkSkinny-64-192.
*/
typedef struct
{
uint16_t TK1[4]; /**< First part of the tweakey */
uint16_t TK2[4]; /**< Second part of the tweakey */
uint16_t TK3[4]; /**< Third part of the tweakey */
uint16_t S[4]; /**< Current block state */
} forkskinny_64_192_state_t;
/**
* \brief Applies several rounds of ForkSkinny-128-256.
*
* \param state State to apply the rounds to.
* \param first First round to apply.
* \param last Last round to apply plus 1.
*/
void forkskinny_128_256_rounds
(forkskinny_128_256_state_t *state, unsigned first, unsigned last);
/**
* \brief Applies several rounds of ForkSkinny-128-256 in reverse.
*
* \param state State to apply the rounds to.
* \param first First round to apply plus 1.
* \param last Last round to apply.
*/
void forkskinny_128_256_inv_rounds
(forkskinny_128_256_state_t *state, unsigned first, unsigned last);
/**
* \brief Forwards the tweakey for ForkSkinny-128-256.
*
* \param state Points to the ForkSkinny-128-256 state.
* \param rounds Number of rounds to forward by.
*/
void forkskinny_128_256_forward_tk
(forkskinny_128_256_state_t *state, unsigned rounds);
/**
* \brief Reverses the tweakey for ForkSkinny-128-256.
*
* \param state Points to the ForkSkinny-128-256 state.
* \param rounds Number of rounds to reverse by.
*/
void forkskinny_128_256_reverse_tk
(forkskinny_128_256_state_t *state, unsigned rounds);
/**
* \brief Applies several rounds of ForkSkinny-128-384.
*
* \param state State to apply the rounds to.
* \param first First round to apply.
* \param last Last round to apply plus 1.
*/
void forkskinny_128_384_rounds
(forkskinny_128_384_state_t *state, unsigned first, unsigned last);
/**
* \brief Applies several rounds of ForkSkinny-128-384 in reverse.
*
* \param state State to apply the rounds to.
* \param first First round to apply plus 1.
* \param last Last round to apply.
*/
void forkskinny_128_384_inv_rounds
(forkskinny_128_384_state_t *state, unsigned first, unsigned last);
/**
* \brief Forwards the tweakey for ForkSkinny-128-384.
*
* \param state Points to the ForkSkinny-128-384 state.
* \param rounds Number of rounds to forward by.
*/
void forkskinny_128_384_forward_tk
(forkskinny_128_384_state_t *state, unsigned rounds);
/**
* \brief Reverses the tweakey for ForkSkinny-128-384.
*
* \param state Points to the ForkSkinny-128-384 state.
* \param rounds Number of rounds to reverse by.
*/
void forkskinny_128_384_reverse_tk
(forkskinny_128_384_state_t *state, unsigned rounds);
/**
* \brief Applies several rounds of ForkSkinny-64-192.
*
* \param state State to apply the rounds to.
* \param first First round to apply.
* \param last Last round to apply plus 1.
*
* Note: The cells of each row are ordered in big-endian nibble order
* so it is simplest to manage the rows in big-endian byte order.
*/
void forkskinny_64_192_rounds
(forkskinny_64_192_state_t *state, unsigned first, unsigned last);
/**
* \brief Applies several rounds of ForkSkinny-64-192 in reverse.
*
* \param state State to apply the rounds to.
* \param first First round to apply plus 1.
* \param last Last round to apply.
*/
void forkskinny_64_192_inv_rounds
(forkskinny_64_192_state_t *state, unsigned first, unsigned last);
/**
* \brief Forwards the tweakey for ForkSkinny-64-192.
*
* \param state Points to the ForkSkinny-64-192 state.
* \param rounds Number of rounds to forward by.
*/
void forkskinny_64_192_forward_tk
(forkskinny_64_192_state_t *state, unsigned rounds);
/**
* \brief Reverses the tweakey for ForkSkinny-64-192.
*
* \param state Points to the ForkSkinny-64-192 state.
* \param rounds Number of rounds to reverse by.
*/
void forkskinny_64_192_reverse_tk
(forkskinny_64_192_state_t *state, unsigned rounds);
/**
* \brief Encrypts a block of plaintext with ForkSkinny-128-256. * \brief Encrypts a block of plaintext with ForkSkinny-128-256.
* *
* \param key 256-bit tweakey for ForkSkinny-128-256. * \param key 256-bit tweakey for ForkSkinny-128-256.
......
...@@ -74,6 +74,21 @@ extern "C" { ...@@ -74,6 +74,21 @@ extern "C" {
( row3 & 0x00FF0000U); \ ( row3 & 0x00FF0000U); \
} while (0) } while (0)
#define skinny128_permute_tk_half(tk2, tk3) \
do { \
/* Permute the bottom half of the tweakey state in place, no swap */ \
uint32_t row2 = tk2; \
uint32_t row3 = tk3; \
row3 = (row3 << 16) | (row3 >> 16); \
tk2 = ((row2 >> 8) & 0x000000FFU) | \
((row2 << 16) & 0x00FF0000U) | \
( row3 & 0xFF00FF00U); \
tk3 = ((row2 >> 16) & 0x000000FFU) | \
(row2 & 0xFF000000U) | \
((row3 << 8) & 0x0000FF00U) | \
( row3 & 0x00FF0000U); \
} while (0)
#define skinny128_inv_permute_tk(tk) \ #define skinny128_inv_permute_tk(tk) \
do { \ do { \
/* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \
...@@ -91,6 +106,21 @@ extern "C" { ...@@ -91,6 +106,21 @@ extern "C" {
((row1 << 8) & 0x00FF0000U); \ ((row1 << 8) & 0x00FF0000U); \
} while (0) } while (0)
#define skinny128_inv_permute_tk_half(tk0, tk1) \
do { \
/* Permute the top half of the tweakey state in place, no swap */ \
uint32_t row0 = tk0; \
uint32_t row1 = tk1; \
tk0 = ((row0 >> 16) & 0x000000FFU) | \
((row0 << 8) & 0x0000FF00U) | \
((row1 << 16) & 0x00FF0000U) | \
( row1 & 0xFF000000U); \
tk1 = ((row0 >> 16) & 0x0000FF00U) | \
((row0 << 16) & 0xFF000000U) | \
((row1 >> 16) & 0x000000FFU) | \
((row1 << 8) & 0x00FF0000U); \
} while (0)
/* /*
* Apply the SKINNY sbox. The original version from the specification is * Apply the SKINNY sbox. The original version from the specification is
* equivalent to: * equivalent to:
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -23,6 +23,8 @@ ...@@ -23,6 +23,8 @@
#ifndef LW_INTERNAL_FORKSKINNY_H #ifndef LW_INTERNAL_FORKSKINNY_H
#define LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H
#include "internal-util.h"
/** /**
* \file internal-forkskinny.h * \file internal-forkskinny.h
* \brief ForkSkinny block cipher family. * \brief ForkSkinny block cipher family.
...@@ -39,6 +41,158 @@ extern "C" { ...@@ -39,6 +41,158 @@ extern "C" {
#endif #endif
/** /**
* \brief State information for ForkSkinny-128-256.
*/
typedef struct
{
uint32_t TK1[4]; /**< First part of the tweakey */
uint32_t TK2[4]; /**< Second part of the tweakey */
uint32_t S[4]; /**< Current block state */
} forkskinny_128_256_state_t;
/**
* \brief State information for ForkSkinny-128-384.
*/
typedef struct
{
uint32_t TK1[4]; /**< First part of the tweakey */
uint32_t TK2[4]; /**< Second part of the tweakey */
uint32_t TK3[4]; /**< Third part of the tweakey */
uint32_t S[4]; /**< Current block state */
} forkskinny_128_384_state_t;
/**
* \brief State information for ForkSkinny-64-192.
*/
typedef struct
{
uint16_t TK1[4]; /**< First part of the tweakey */
uint16_t TK2[4]; /**< Second part of the tweakey */
uint16_t TK3[4]; /**< Third part of the tweakey */
uint16_t S[4]; /**< Current block state */
} forkskinny_64_192_state_t;
/**
* \brief Applies several rounds of ForkSkinny-128-256.
*
* \param state State to apply the rounds to.
* \param first First round to apply.
* \param last Last round to apply plus 1.
*/
void forkskinny_128_256_rounds
(forkskinny_128_256_state_t *state, unsigned first, unsigned last);
/**
* \brief Applies several rounds of ForkSkinny-128-256 in reverse.
*
* \param state State to apply the rounds to.
* \param first First round to apply plus 1.
* \param last Last round to apply.
*/
void forkskinny_128_256_inv_rounds
(forkskinny_128_256_state_t *state, unsigned first, unsigned last);
/**
* \brief Forwards the tweakey for ForkSkinny-128-256.
*
* \param state Points to the ForkSkinny-128-256 state.
* \param rounds Number of rounds to forward by.
*/
void forkskinny_128_256_forward_tk
(forkskinny_128_256_state_t *state, unsigned rounds);
/**
* \brief Reverses the tweakey for ForkSkinny-128-256.
*
* \param state Points to the ForkSkinny-128-256 state.
* \param rounds Number of rounds to reverse by.
*/
void forkskinny_128_256_reverse_tk
(forkskinny_128_256_state_t *state, unsigned rounds);
/**
* \brief Applies several rounds of ForkSkinny-128-384.
*
* \param state State to apply the rounds to.
* \param first First round to apply.
* \param last Last round to apply plus 1.
*/
void forkskinny_128_384_rounds
(forkskinny_128_384_state_t *state, unsigned first, unsigned last);
/**
* \brief Applies several rounds of ForkSkinny-128-384 in reverse.
*
* \param state State to apply the rounds to.
* \param first First round to apply plus 1.
* \param last Last round to apply.
*/
void forkskinny_128_384_inv_rounds
(forkskinny_128_384_state_t *state, unsigned first, unsigned last);
/**
* \brief Forwards the tweakey for ForkSkinny-128-384.
*
* \param state Points to the ForkSkinny-128-384 state.
* \param rounds Number of rounds to forward by.
*/
void forkskinny_128_384_forward_tk
(forkskinny_128_384_state_t *state, unsigned rounds);
/**
* \brief Reverses the tweakey for ForkSkinny-128-384.
*
* \param state Points to the ForkSkinny-128-384 state.
* \param rounds Number of rounds to reverse by.
*/
void forkskinny_128_384_reverse_tk
(forkskinny_128_384_state_t *state, unsigned rounds);
/**
* \brief Applies several rounds of ForkSkinny-64-192.
*
* \param state State to apply the rounds to.
* \param first First round to apply.
* \param last Last round to apply plus 1.
*
* Note: The cells of each row are ordered in big-endian nibble order
* so it is simplest to manage the rows in big-endian byte order.
*/
void forkskinny_64_192_rounds
(forkskinny_64_192_state_t *state, unsigned first, unsigned last);
/**
* \brief Applies several rounds of ForkSkinny-64-192 in reverse.
*
* \param state State to apply the rounds to.
* \param first First round to apply plus 1.
* \param last Last round to apply.
*/
void forkskinny_64_192_inv_rounds
(forkskinny_64_192_state_t *state, unsigned first, unsigned last);
/**
* \brief Forwards the tweakey for ForkSkinny-64-192.
*
* \param state Points to the ForkSkinny-64-192 state.
* \param rounds Number of rounds to forward by.
*/
void forkskinny_64_192_forward_tk
(forkskinny_64_192_state_t *state, unsigned rounds);
/**
* \brief Reverses the tweakey for ForkSkinny-64-192.
*
* \param state Points to the ForkSkinny-64-192 state.
* \param rounds Number of rounds to reverse by.
*/
void forkskinny_64_192_reverse_tk
(forkskinny_64_192_state_t *state, unsigned rounds);
/**
* \brief Encrypts a block of plaintext with ForkSkinny-128-256. * \brief Encrypts a block of plaintext with ForkSkinny-128-256.
* *
* \param key 256-bit tweakey for ForkSkinny-128-256. * \param key 256-bit tweakey for ForkSkinny-128-256.
......
...@@ -74,6 +74,21 @@ extern "C" { ...@@ -74,6 +74,21 @@ extern "C" {
( row3 & 0x00FF0000U); \ ( row3 & 0x00FF0000U); \
} while (0) } while (0)
#define skinny128_permute_tk_half(tk2, tk3) \
do { \
/* Permute the bottom half of the tweakey state in place, no swap */ \
uint32_t row2 = tk2; \
uint32_t row3 = tk3; \
row3 = (row3 << 16) | (row3 >> 16); \
tk2 = ((row2 >> 8) & 0x000000FFU) | \
((row2 << 16) & 0x00FF0000U) | \
( row3 & 0xFF00FF00U); \
tk3 = ((row2 >> 16) & 0x000000FFU) | \
(row2 & 0xFF000000U) | \
((row3 << 8) & 0x0000FF00U) | \
( row3 & 0x00FF0000U); \
} while (0)
#define skinny128_inv_permute_tk(tk) \ #define skinny128_inv_permute_tk(tk) \
do { \ do { \
/* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \
...@@ -91,6 +106,21 @@ extern "C" { ...@@ -91,6 +106,21 @@ extern "C" {
((row1 << 8) & 0x00FF0000U); \ ((row1 << 8) & 0x00FF0000U); \
} while (0) } while (0)
#define skinny128_inv_permute_tk_half(tk0, tk1) \
do { \
/* Permute the top half of the tweakey state in place, no swap */ \
uint32_t row0 = tk0; \
uint32_t row1 = tk1; \
tk0 = ((row0 >> 16) & 0x000000FFU) | \
((row0 << 8) & 0x0000FF00U) | \
((row1 << 16) & 0x00FF0000U) | \
( row1 & 0xFF000000U); \
tk1 = ((row0 >> 16) & 0x0000FF00U) | \
((row0 << 16) & 0xFF000000U) | \
((row1 >> 16) & 0x000000FFU) | \
((row1 << 8) & 0x00FF0000U); \
} while (0)
/* /*
* Apply the SKINNY sbox. The original version from the specification is * Apply the SKINNY sbox. The original version from the specification is
* equivalent to: * equivalent to:
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -23,6 +23,8 @@ ...@@ -23,6 +23,8 @@
#ifndef LW_INTERNAL_FORKSKINNY_H #ifndef LW_INTERNAL_FORKSKINNY_H
#define LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H
#include "internal-util.h"
/** /**
* \file internal-forkskinny.h * \file internal-forkskinny.h
* \brief ForkSkinny block cipher family. * \brief ForkSkinny block cipher family.
...@@ -39,6 +41,158 @@ extern "C" { ...@@ -39,6 +41,158 @@ extern "C" {
#endif #endif
/** /**
* \brief State information for ForkSkinny-128-256.
*/
typedef struct
{
uint32_t TK1[4]; /**< First part of the tweakey */
uint32_t TK2[4]; /**< Second part of the tweakey */
uint32_t S[4]; /**< Current block state */
} forkskinny_128_256_state_t;
/**
* \brief State information for ForkSkinny-128-384.
*/
typedef struct
{
uint32_t TK1[4]; /**< First part of the tweakey */
uint32_t TK2[4]; /**< Second part of the tweakey */
uint32_t TK3[4]; /**< Third part of the tweakey */
uint32_t S[4]; /**< Current block state */
} forkskinny_128_384_state_t;
/**
* \brief State information for ForkSkinny-64-192.
*/
typedef struct
{
uint16_t TK1[4]; /**< First part of the tweakey */
uint16_t TK2[4]; /**< Second part of the tweakey */
uint16_t TK3[4]; /**< Third part of the tweakey */
uint16_t S[4]; /**< Current block state */
} forkskinny_64_192_state_t;
/**
* \brief Applies several rounds of ForkSkinny-128-256.
*
* \param state State to apply the rounds to.
* \param first First round to apply.
* \param last Last round to apply plus 1.
*/
void forkskinny_128_256_rounds
(forkskinny_128_256_state_t *state, unsigned first, unsigned last);
/**
* \brief Applies several rounds of ForkSkinny-128-256 in reverse.
*
* \param state State to apply the rounds to.
* \param first First round to apply plus 1.
* \param last Last round to apply.
*/
void forkskinny_128_256_inv_rounds
(forkskinny_128_256_state_t *state, unsigned first, unsigned last);
/**
* \brief Forwards the tweakey for ForkSkinny-128-256.
*
* \param state Points to the ForkSkinny-128-256 state.
* \param rounds Number of rounds to forward by.
*/
void forkskinny_128_256_forward_tk
(forkskinny_128_256_state_t *state, unsigned rounds);
/**
* \brief Reverses the tweakey for ForkSkinny-128-256.
*
* \param state Points to the ForkSkinny-128-256 state.
* \param rounds Number of rounds to reverse by.
*/
void forkskinny_128_256_reverse_tk
(forkskinny_128_256_state_t *state, unsigned rounds);
/**
* \brief Applies several rounds of ForkSkinny-128-384.
*
* \param state State to apply the rounds to.
* \param first First round to apply.
* \param last Last round to apply plus 1.
*/
void forkskinny_128_384_rounds
(forkskinny_128_384_state_t *state, unsigned first, unsigned last);
/**
* \brief Applies several rounds of ForkSkinny-128-384 in reverse.
*
* \param state State to apply the rounds to.
* \param first First round to apply plus 1.
* \param last Last round to apply.
*/
void forkskinny_128_384_inv_rounds
(forkskinny_128_384_state_t *state, unsigned first, unsigned last);
/**
* \brief Forwards the tweakey for ForkSkinny-128-384.
*
* \param state Points to the ForkSkinny-128-384 state.
* \param rounds Number of rounds to forward by.
*/
void forkskinny_128_384_forward_tk
(forkskinny_128_384_state_t *state, unsigned rounds);
/**
* \brief Reverses the tweakey for ForkSkinny-128-384.
*
* \param state Points to the ForkSkinny-128-384 state.
* \param rounds Number of rounds to reverse by.
*/
void forkskinny_128_384_reverse_tk
(forkskinny_128_384_state_t *state, unsigned rounds);
/**
* \brief Applies several rounds of ForkSkinny-64-192.
*
* \param state State to apply the rounds to.
* \param first First round to apply.
* \param last Last round to apply plus 1.
*
* Note: The cells of each row are ordered in big-endian nibble order
* so it is simplest to manage the rows in big-endian byte order.
*/
void forkskinny_64_192_rounds
(forkskinny_64_192_state_t *state, unsigned first, unsigned last);
/**
* \brief Applies several rounds of ForkSkinny-64-192 in reverse.
*
* \param state State to apply the rounds to.
* \param first First round to apply plus 1.
* \param last Last round to apply.
*/
void forkskinny_64_192_inv_rounds
(forkskinny_64_192_state_t *state, unsigned first, unsigned last);
/**
* \brief Forwards the tweakey for ForkSkinny-64-192.
*
* \param state Points to the ForkSkinny-64-192 state.
* \param rounds Number of rounds to forward by.
*/
void forkskinny_64_192_forward_tk
(forkskinny_64_192_state_t *state, unsigned rounds);
/**
* \brief Reverses the tweakey for ForkSkinny-64-192.
*
* \param state Points to the ForkSkinny-64-192 state.
* \param rounds Number of rounds to reverse by.
*/
void forkskinny_64_192_reverse_tk
(forkskinny_64_192_state_t *state, unsigned rounds);
/**
* \brief Encrypts a block of plaintext with ForkSkinny-128-256. * \brief Encrypts a block of plaintext with ForkSkinny-128-256.
* *
* \param key 256-bit tweakey for ForkSkinny-128-256. * \param key 256-bit tweakey for ForkSkinny-128-256.
......
...@@ -74,6 +74,21 @@ extern "C" { ...@@ -74,6 +74,21 @@ extern "C" {
( row3 & 0x00FF0000U); \ ( row3 & 0x00FF0000U); \
} while (0) } while (0)
#define skinny128_permute_tk_half(tk2, tk3) \
do { \
/* Permute the bottom half of the tweakey state in place, no swap */ \
uint32_t row2 = tk2; \
uint32_t row3 = tk3; \
row3 = (row3 << 16) | (row3 >> 16); \
tk2 = ((row2 >> 8) & 0x000000FFU) | \
((row2 << 16) & 0x00FF0000U) | \
( row3 & 0xFF00FF00U); \
tk3 = ((row2 >> 16) & 0x000000FFU) | \
(row2 & 0xFF000000U) | \
((row3 << 8) & 0x0000FF00U) | \
( row3 & 0x00FF0000U); \
} while (0)
#define skinny128_inv_permute_tk(tk) \ #define skinny128_inv_permute_tk(tk) \
do { \ do { \
/* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \
...@@ -91,6 +106,21 @@ extern "C" { ...@@ -91,6 +106,21 @@ extern "C" {
((row1 << 8) & 0x00FF0000U); \ ((row1 << 8) & 0x00FF0000U); \
} while (0) } while (0)
#define skinny128_inv_permute_tk_half(tk0, tk1) \
do { \
/* Permute the top half of the tweakey state in place, no swap */ \
uint32_t row0 = tk0; \
uint32_t row1 = tk1; \
tk0 = ((row0 >> 16) & 0x000000FFU) | \
((row0 << 8) & 0x0000FF00U) | \
((row1 << 16) & 0x00FF0000U) | \
( row1 & 0xFF000000U); \
tk1 = ((row0 >> 16) & 0x0000FF00U) | \
((row0 << 16) & 0xFF000000U) | \
((row1 >> 16) & 0x000000FFU) | \
((row1 << 8) & 0x00FF0000U); \
} while (0)
/* /*
* Apply the SKINNY sbox. The original version from the specification is * Apply the SKINNY sbox. The original version from the specification is
* equivalent to: * equivalent to:
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -23,6 +23,8 @@ ...@@ -23,6 +23,8 @@
#ifndef LW_INTERNAL_FORKSKINNY_H #ifndef LW_INTERNAL_FORKSKINNY_H
#define LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H
#include "internal-util.h"
/** /**
* \file internal-forkskinny.h * \file internal-forkskinny.h
* \brief ForkSkinny block cipher family. * \brief ForkSkinny block cipher family.
...@@ -39,6 +41,158 @@ extern "C" { ...@@ -39,6 +41,158 @@ extern "C" {
#endif #endif
/** /**
* \brief State information for ForkSkinny-128-256.
*/
typedef struct
{
uint32_t TK1[4]; /**< First part of the tweakey */
uint32_t TK2[4]; /**< Second part of the tweakey */
uint32_t S[4]; /**< Current block state */
} forkskinny_128_256_state_t;
/**
* \brief State information for ForkSkinny-128-384.
*/
typedef struct
{
uint32_t TK1[4]; /**< First part of the tweakey */
uint32_t TK2[4]; /**< Second part of the tweakey */
uint32_t TK3[4]; /**< Third part of the tweakey */
uint32_t S[4]; /**< Current block state */
} forkskinny_128_384_state_t;
/**
* \brief State information for ForkSkinny-64-192.
*/
typedef struct
{
uint16_t TK1[4]; /**< First part of the tweakey */
uint16_t TK2[4]; /**< Second part of the tweakey */
uint16_t TK3[4]; /**< Third part of the tweakey */
uint16_t S[4]; /**< Current block state */
} forkskinny_64_192_state_t;
/**
* \brief Applies several rounds of ForkSkinny-128-256.
*
* \param state State to apply the rounds to.
* \param first First round to apply.
* \param last Last round to apply plus 1.
*/
void forkskinny_128_256_rounds
(forkskinny_128_256_state_t *state, unsigned first, unsigned last);
/**
* \brief Applies several rounds of ForkSkinny-128-256 in reverse.
*
* \param state State to apply the rounds to.
* \param first First round to apply plus 1.
* \param last Last round to apply.
*/
void forkskinny_128_256_inv_rounds
(forkskinny_128_256_state_t *state, unsigned first, unsigned last);
/**
* \brief Forwards the tweakey for ForkSkinny-128-256.
*
* \param state Points to the ForkSkinny-128-256 state.
* \param rounds Number of rounds to forward by.
*/
void forkskinny_128_256_forward_tk
(forkskinny_128_256_state_t *state, unsigned rounds);
/**
* \brief Reverses the tweakey for ForkSkinny-128-256.
*
* \param state Points to the ForkSkinny-128-256 state.
* \param rounds Number of rounds to reverse by.
*/
void forkskinny_128_256_reverse_tk
(forkskinny_128_256_state_t *state, unsigned rounds);
/**
* \brief Applies several rounds of ForkSkinny-128-384.
*
* \param state State to apply the rounds to.
* \param first First round to apply.
* \param last Last round to apply plus 1.
*/
void forkskinny_128_384_rounds
(forkskinny_128_384_state_t *state, unsigned first, unsigned last);
/**
* \brief Applies several rounds of ForkSkinny-128-384 in reverse.
*
* \param state State to apply the rounds to.
* \param first First round to apply plus 1.
* \param last Last round to apply.
*/
void forkskinny_128_384_inv_rounds
(forkskinny_128_384_state_t *state, unsigned first, unsigned last);
/**
* \brief Forwards the tweakey for ForkSkinny-128-384.
*
* \param state Points to the ForkSkinny-128-384 state.
* \param rounds Number of rounds to forward by.
*/
void forkskinny_128_384_forward_tk
(forkskinny_128_384_state_t *state, unsigned rounds);
/**
* \brief Reverses the tweakey for ForkSkinny-128-384.
*
* \param state Points to the ForkSkinny-128-384 state.
* \param rounds Number of rounds to reverse by.
*/
void forkskinny_128_384_reverse_tk
(forkskinny_128_384_state_t *state, unsigned rounds);
/**
* \brief Applies several rounds of ForkSkinny-64-192.
*
* \param state State to apply the rounds to.
* \param first First round to apply.
* \param last Last round to apply plus 1.
*
* Note: The cells of each row are ordered in big-endian nibble order
* so it is simplest to manage the rows in big-endian byte order.
*/
void forkskinny_64_192_rounds
(forkskinny_64_192_state_t *state, unsigned first, unsigned last);
/**
* \brief Applies several rounds of ForkSkinny-64-192 in reverse.
*
* \param state State to apply the rounds to.
* \param first First round to apply plus 1.
* \param last Last round to apply.
*/
void forkskinny_64_192_inv_rounds
(forkskinny_64_192_state_t *state, unsigned first, unsigned last);
/**
* \brief Forwards the tweakey for ForkSkinny-64-192.
*
* \param state Points to the ForkSkinny-64-192 state.
* \param rounds Number of rounds to forward by.
*/
void forkskinny_64_192_forward_tk
(forkskinny_64_192_state_t *state, unsigned rounds);
/**
* \brief Reverses the tweakey for ForkSkinny-64-192.
*
* \param state Points to the ForkSkinny-64-192 state.
* \param rounds Number of rounds to reverse by.
*/
void forkskinny_64_192_reverse_tk
(forkskinny_64_192_state_t *state, unsigned rounds);
/**
* \brief Encrypts a block of plaintext with ForkSkinny-128-256. * \brief Encrypts a block of plaintext with ForkSkinny-128-256.
* *
* \param key 256-bit tweakey for ForkSkinny-128-256. * \param key 256-bit tweakey for ForkSkinny-128-256.
......
...@@ -74,6 +74,21 @@ extern "C" { ...@@ -74,6 +74,21 @@ extern "C" {
( row3 & 0x00FF0000U); \ ( row3 & 0x00FF0000U); \
} while (0) } while (0)
#define skinny128_permute_tk_half(tk2, tk3) \
do { \
/* Permute the bottom half of the tweakey state in place, no swap */ \
uint32_t row2 = tk2; \
uint32_t row3 = tk3; \
row3 = (row3 << 16) | (row3 >> 16); \
tk2 = ((row2 >> 8) & 0x000000FFU) | \
((row2 << 16) & 0x00FF0000U) | \
( row3 & 0xFF00FF00U); \
tk3 = ((row2 >> 16) & 0x000000FFU) | \
(row2 & 0xFF000000U) | \
((row3 << 8) & 0x0000FF00U) | \
( row3 & 0x00FF0000U); \
} while (0)
#define skinny128_inv_permute_tk(tk) \ #define skinny128_inv_permute_tk(tk) \
do { \ do { \
/* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \
...@@ -91,6 +106,21 @@ extern "C" { ...@@ -91,6 +106,21 @@ extern "C" {
((row1 << 8) & 0x00FF0000U); \ ((row1 << 8) & 0x00FF0000U); \
} while (0) } while (0)
#define skinny128_inv_permute_tk_half(tk0, tk1) \
do { \
/* Permute the top half of the tweakey state in place, no swap */ \
uint32_t row0 = tk0; \
uint32_t row1 = tk1; \
tk0 = ((row0 >> 16) & 0x000000FFU) | \
((row0 << 8) & 0x0000FF00U) | \
((row1 << 16) & 0x00FF0000U) | \
( row1 & 0xFF000000U); \
tk1 = ((row0 >> 16) & 0x0000FF00U) | \
((row0 << 16) & 0xFF000000U) | \
((row1 >> 16) & 0x000000FFU) | \
((row1 << 8) & 0x00FF0000U); \
} while (0)
/* /*
* Apply the SKINNY sbox. The original version from the specification is * Apply the SKINNY sbox. The original version from the specification is
* equivalent to: * equivalent to:
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -23,6 +23,8 @@ ...@@ -23,6 +23,8 @@
#ifndef LW_INTERNAL_FORKSKINNY_H #ifndef LW_INTERNAL_FORKSKINNY_H
#define LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H
#include "internal-util.h"
/** /**
* \file internal-forkskinny.h * \file internal-forkskinny.h
* \brief ForkSkinny block cipher family. * \brief ForkSkinny block cipher family.
...@@ -39,6 +41,158 @@ extern "C" { ...@@ -39,6 +41,158 @@ extern "C" {
#endif #endif
/** /**
* \brief State information for ForkSkinny-128-256.
*/
typedef struct
{
uint32_t TK1[4]; /**< First part of the tweakey */
uint32_t TK2[4]; /**< Second part of the tweakey */
uint32_t S[4]; /**< Current block state */
} forkskinny_128_256_state_t;
/**
* \brief State information for ForkSkinny-128-384.
*/
typedef struct
{
uint32_t TK1[4]; /**< First part of the tweakey */
uint32_t TK2[4]; /**< Second part of the tweakey */
uint32_t TK3[4]; /**< Third part of the tweakey */
uint32_t S[4]; /**< Current block state */
} forkskinny_128_384_state_t;
/**
* \brief State information for ForkSkinny-64-192.
*/
typedef struct
{
uint16_t TK1[4]; /**< First part of the tweakey */
uint16_t TK2[4]; /**< Second part of the tweakey */
uint16_t TK3[4]; /**< Third part of the tweakey */
uint16_t S[4]; /**< Current block state */
} forkskinny_64_192_state_t;
/**
* \brief Applies several rounds of ForkSkinny-128-256.
*
* \param state State to apply the rounds to.
* \param first First round to apply.
* \param last Last round to apply plus 1.
*/
void forkskinny_128_256_rounds
(forkskinny_128_256_state_t *state, unsigned first, unsigned last);
/**
* \brief Applies several rounds of ForkSkinny-128-256 in reverse.
*
* \param state State to apply the rounds to.
* \param first First round to apply plus 1.
* \param last Last round to apply.
*/
void forkskinny_128_256_inv_rounds
(forkskinny_128_256_state_t *state, unsigned first, unsigned last);
/**
* \brief Forwards the tweakey for ForkSkinny-128-256.
*
* \param state Points to the ForkSkinny-128-256 state.
* \param rounds Number of rounds to forward by.
*/
void forkskinny_128_256_forward_tk
(forkskinny_128_256_state_t *state, unsigned rounds);
/**
* \brief Reverses the tweakey for ForkSkinny-128-256.
*
* \param state Points to the ForkSkinny-128-256 state.
* \param rounds Number of rounds to reverse by.
*/
void forkskinny_128_256_reverse_tk
(forkskinny_128_256_state_t *state, unsigned rounds);
/**
* \brief Applies several rounds of ForkSkinny-128-384.
*
* \param state State to apply the rounds to.
* \param first First round to apply.
* \param last Last round to apply plus 1.
*/
void forkskinny_128_384_rounds
(forkskinny_128_384_state_t *state, unsigned first, unsigned last);
/**
* \brief Applies several rounds of ForkSkinny-128-384 in reverse.
*
* \param state State to apply the rounds to.
* \param first First round to apply plus 1.
* \param last Last round to apply.
*/
void forkskinny_128_384_inv_rounds
(forkskinny_128_384_state_t *state, unsigned first, unsigned last);
/**
* \brief Forwards the tweakey for ForkSkinny-128-384.
*
* \param state Points to the ForkSkinny-128-384 state.
* \param rounds Number of rounds to forward by.
*/
void forkskinny_128_384_forward_tk
(forkskinny_128_384_state_t *state, unsigned rounds);
/**
* \brief Reverses the tweakey for ForkSkinny-128-384.
*
* \param state Points to the ForkSkinny-128-384 state.
* \param rounds Number of rounds to reverse by.
*/
void forkskinny_128_384_reverse_tk
(forkskinny_128_384_state_t *state, unsigned rounds);
/**
* \brief Applies several rounds of ForkSkinny-64-192.
*
* \param state State to apply the rounds to.
* \param first First round to apply.
* \param last Last round to apply plus 1.
*
* Note: The cells of each row are ordered in big-endian nibble order
* so it is simplest to manage the rows in big-endian byte order.
*/
void forkskinny_64_192_rounds
(forkskinny_64_192_state_t *state, unsigned first, unsigned last);
/**
* \brief Applies several rounds of ForkSkinny-64-192 in reverse.
*
* \param state State to apply the rounds to.
* \param first First round to apply plus 1.
* \param last Last round to apply.
*/
void forkskinny_64_192_inv_rounds
(forkskinny_64_192_state_t *state, unsigned first, unsigned last);
/**
* \brief Forwards the tweakey for ForkSkinny-64-192.
*
* \param state Points to the ForkSkinny-64-192 state.
* \param rounds Number of rounds to forward by.
*/
void forkskinny_64_192_forward_tk
(forkskinny_64_192_state_t *state, unsigned rounds);
/**
* \brief Reverses the tweakey for ForkSkinny-64-192.
*
* \param state Points to the ForkSkinny-64-192 state.
* \param rounds Number of rounds to reverse by.
*/
void forkskinny_64_192_reverse_tk
(forkskinny_64_192_state_t *state, unsigned rounds);
/**
* \brief Encrypts a block of plaintext with ForkSkinny-128-256. * \brief Encrypts a block of plaintext with ForkSkinny-128-256.
* *
* \param key 256-bit tweakey for ForkSkinny-128-256. * \param key 256-bit tweakey for ForkSkinny-128-256.
......
...@@ -74,6 +74,21 @@ extern "C" { ...@@ -74,6 +74,21 @@ extern "C" {
( row3 & 0x00FF0000U); \ ( row3 & 0x00FF0000U); \
} while (0) } while (0)
#define skinny128_permute_tk_half(tk2, tk3) \
do { \
/* Permute the bottom half of the tweakey state in place, no swap */ \
uint32_t row2 = tk2; \
uint32_t row3 = tk3; \
row3 = (row3 << 16) | (row3 >> 16); \
tk2 = ((row2 >> 8) & 0x000000FFU) | \
((row2 << 16) & 0x00FF0000U) | \
( row3 & 0xFF00FF00U); \
tk3 = ((row2 >> 16) & 0x000000FFU) | \
(row2 & 0xFF000000U) | \
((row3 << 8) & 0x0000FF00U) | \
( row3 & 0x00FF0000U); \
} while (0)
#define skinny128_inv_permute_tk(tk) \ #define skinny128_inv_permute_tk(tk) \
do { \ do { \
/* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \
...@@ -91,6 +106,21 @@ extern "C" { ...@@ -91,6 +106,21 @@ extern "C" {
((row1 << 8) & 0x00FF0000U); \ ((row1 << 8) & 0x00FF0000U); \
} while (0) } while (0)
#define skinny128_inv_permute_tk_half(tk0, tk1) \
do { \
/* Permute the top half of the tweakey state in place, no swap */ \
uint32_t row0 = tk0; \
uint32_t row1 = tk1; \
tk0 = ((row0 >> 16) & 0x000000FFU) | \
((row0 << 8) & 0x0000FF00U) | \
((row1 << 16) & 0x00FF0000U) | \
( row1 & 0xFF000000U); \
tk1 = ((row0 >> 16) & 0x0000FF00U) | \
((row0 << 16) & 0xFF000000U) | \
((row1 >> 16) & 0x000000FFU) | \
((row1 << 8) & 0x00FF0000U); \
} while (0)
/* /*
* Apply the SKINNY sbox. The original version from the specification is * Apply the SKINNY sbox. The original version from the specification is
* equivalent to: * equivalent to:
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -23,6 +23,8 @@ ...@@ -23,6 +23,8 @@
#ifndef LW_INTERNAL_FORKSKINNY_H #ifndef LW_INTERNAL_FORKSKINNY_H
#define LW_INTERNAL_FORKSKINNY_H #define LW_INTERNAL_FORKSKINNY_H
#include "internal-util.h"
/** /**
* \file internal-forkskinny.h * \file internal-forkskinny.h
* \brief ForkSkinny block cipher family. * \brief ForkSkinny block cipher family.
...@@ -39,6 +41,158 @@ extern "C" { ...@@ -39,6 +41,158 @@ extern "C" {
#endif #endif
/** /**
* \brief State information for ForkSkinny-128-256.
*/
typedef struct
{
uint32_t TK1[4]; /**< First part of the tweakey */
uint32_t TK2[4]; /**< Second part of the tweakey */
uint32_t S[4]; /**< Current block state */
} forkskinny_128_256_state_t;
/**
* \brief State information for ForkSkinny-128-384.
*/
typedef struct
{
uint32_t TK1[4]; /**< First part of the tweakey */
uint32_t TK2[4]; /**< Second part of the tweakey */
uint32_t TK3[4]; /**< Third part of the tweakey */
uint32_t S[4]; /**< Current block state */
} forkskinny_128_384_state_t;
/**
* \brief State information for ForkSkinny-64-192.
*/
typedef struct
{
uint16_t TK1[4]; /**< First part of the tweakey */
uint16_t TK2[4]; /**< Second part of the tweakey */
uint16_t TK3[4]; /**< Third part of the tweakey */
uint16_t S[4]; /**< Current block state */
} forkskinny_64_192_state_t;
/**
* \brief Applies several rounds of ForkSkinny-128-256.
*
* \param state State to apply the rounds to.
* \param first First round to apply.
* \param last Last round to apply plus 1.
*/
void forkskinny_128_256_rounds
(forkskinny_128_256_state_t *state, unsigned first, unsigned last);
/**
* \brief Applies several rounds of ForkSkinny-128-256 in reverse.
*
* \param state State to apply the rounds to.
* \param first First round to apply plus 1.
* \param last Last round to apply.
*/
void forkskinny_128_256_inv_rounds
(forkskinny_128_256_state_t *state, unsigned first, unsigned last);
/**
* \brief Forwards the tweakey for ForkSkinny-128-256.
*
* \param state Points to the ForkSkinny-128-256 state.
* \param rounds Number of rounds to forward by.
*/
void forkskinny_128_256_forward_tk
(forkskinny_128_256_state_t *state, unsigned rounds);
/**
* \brief Reverses the tweakey for ForkSkinny-128-256.
*
* \param state Points to the ForkSkinny-128-256 state.
* \param rounds Number of rounds to reverse by.
*/
void forkskinny_128_256_reverse_tk
(forkskinny_128_256_state_t *state, unsigned rounds);
/**
* \brief Applies several rounds of ForkSkinny-128-384.
*
* \param state State to apply the rounds to.
* \param first First round to apply.
* \param last Last round to apply plus 1.
*/
void forkskinny_128_384_rounds
(forkskinny_128_384_state_t *state, unsigned first, unsigned last);
/**
* \brief Applies several rounds of ForkSkinny-128-384 in reverse.
*
* \param state State to apply the rounds to.
* \param first First round to apply plus 1.
* \param last Last round to apply.
*/
void forkskinny_128_384_inv_rounds
(forkskinny_128_384_state_t *state, unsigned first, unsigned last);
/**
* \brief Forwards the tweakey for ForkSkinny-128-384.
*
* \param state Points to the ForkSkinny-128-384 state.
* \param rounds Number of rounds to forward by.
*/
void forkskinny_128_384_forward_tk
(forkskinny_128_384_state_t *state, unsigned rounds);
/**
* \brief Reverses the tweakey for ForkSkinny-128-384.
*
* \param state Points to the ForkSkinny-128-384 state.
* \param rounds Number of rounds to reverse by.
*/
void forkskinny_128_384_reverse_tk
(forkskinny_128_384_state_t *state, unsigned rounds);
/**
* \brief Applies several rounds of ForkSkinny-64-192.
*
* \param state State to apply the rounds to.
* \param first First round to apply.
* \param last Last round to apply plus 1.
*
* Note: The cells of each row are ordered in big-endian nibble order
* so it is simplest to manage the rows in big-endian byte order.
*/
void forkskinny_64_192_rounds
(forkskinny_64_192_state_t *state, unsigned first, unsigned last);
/**
* \brief Applies several rounds of ForkSkinny-64-192 in reverse.
*
* \param state State to apply the rounds to.
* \param first First round to apply plus 1.
* \param last Last round to apply.
*/
void forkskinny_64_192_inv_rounds
(forkskinny_64_192_state_t *state, unsigned first, unsigned last);
/**
* \brief Forwards the tweakey for ForkSkinny-64-192.
*
* \param state Points to the ForkSkinny-64-192 state.
* \param rounds Number of rounds to forward by.
*/
void forkskinny_64_192_forward_tk
(forkskinny_64_192_state_t *state, unsigned rounds);
/**
* \brief Reverses the tweakey for ForkSkinny-64-192.
*
* \param state Points to the ForkSkinny-64-192 state.
* \param rounds Number of rounds to reverse by.
*/
void forkskinny_64_192_reverse_tk
(forkskinny_64_192_state_t *state, unsigned rounds);
/**
* \brief Encrypts a block of plaintext with ForkSkinny-128-256. * \brief Encrypts a block of plaintext with ForkSkinny-128-256.
* *
* \param key 256-bit tweakey for ForkSkinny-128-256. * \param key 256-bit tweakey for ForkSkinny-128-256.
......
...@@ -74,6 +74,21 @@ extern "C" { ...@@ -74,6 +74,21 @@ extern "C" {
( row3 & 0x00FF0000U); \ ( row3 & 0x00FF0000U); \
} while (0) } while (0)
#define skinny128_permute_tk_half(tk2, tk3) \
do { \
/* Permute the bottom half of the tweakey state in place, no swap */ \
uint32_t row2 = tk2; \
uint32_t row3 = tk3; \
row3 = (row3 << 16) | (row3 >> 16); \
tk2 = ((row2 >> 8) & 0x000000FFU) | \
((row2 << 16) & 0x00FF0000U) | \
( row3 & 0xFF00FF00U); \
tk3 = ((row2 >> 16) & 0x000000FFU) | \
(row2 & 0xFF000000U) | \
((row3 << 8) & 0x0000FF00U) | \
( row3 & 0x00FF0000U); \
} while (0)
#define skinny128_inv_permute_tk(tk) \ #define skinny128_inv_permute_tk(tk) \
do { \ do { \
/* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \ /* PT' = [8, 9, 10, 11, 12, 13, 14, 15, 2, 0, 4, 7, 6, 3, 5, 1] */ \
...@@ -91,6 +106,21 @@ extern "C" { ...@@ -91,6 +106,21 @@ extern "C" {
((row1 << 8) & 0x00FF0000U); \ ((row1 << 8) & 0x00FF0000U); \
} while (0) } while (0)
#define skinny128_inv_permute_tk_half(tk0, tk1) \
do { \
/* Permute the top half of the tweakey state in place, no swap */ \
uint32_t row0 = tk0; \
uint32_t row1 = tk1; \
tk0 = ((row0 >> 16) & 0x000000FFU) | \
((row0 << 8) & 0x0000FF00U) | \
((row1 << 16) & 0x00FF0000U) | \
( row1 & 0xFF000000U); \
tk1 = ((row0 >> 16) & 0x0000FF00U) | \
((row0 << 16) & 0xFF000000U) | \
((row1 >> 16) & 0x000000FFU) | \
((row1 << 8) & 0x00FF0000U); \
} while (0)
/* /*
* Apply the SKINNY sbox. The original version from the specification is * Apply the SKINNY sbox. The original version from the specification is
* equivalent to: * equivalent to:
......
...@@ -33,6 +33,8 @@ ...@@ -33,6 +33,8 @@
* combination of a 128-bit linear feedback shift register (LFSR) and a * combination of a 128-bit linear feedback shift register (LFSR) and a
* 128-bit non-linear feedback shift register (NFSR). It is a member of * 128-bit non-linear feedback shift register (NFSR). It is a member of
* the Grain family of stream ciphers. * the Grain family of stream ciphers.
*
* References: https://grain-128aead.github.io/
*/ */
#ifdef __cplusplus #ifdef __cplusplus
......
...@@ -26,14 +26,9 @@ ...@@ -26,14 +26,9 @@
#define GWORD(a, b, start_bit) \ #define GWORD(a, b, start_bit) \
(((a) << ((start_bit) % 32)) ^ ((b) >> (32 - ((start_bit) % 32)))) (((a) << ((start_bit) % 32)) ^ ((b) >> (32 - ((start_bit) % 32))))
/** #if !defined(__AVR__)
* \brief Performs 32 rounds of Grain-128 in parallel.
* void grain128_core
* \param state Grain-128 state.
* \param x 32 bits of input to be incorporated into the LFSR state, or zero.
* \param x2 Another 32 bits to be incorporated into the NFSR state, or zero.
*/
static void grain128_core
(grain128_state_t *state, uint32_t x, uint32_t x2) (grain128_state_t *state, uint32_t x, uint32_t x2)
{ {
uint32_t s0, s1, s2, s3; uint32_t s0, s1, s2, s3;
...@@ -67,7 +62,7 @@ static void grain128_core ...@@ -67,7 +62,7 @@ static void grain128_core
/* Perform the NFSR feedback algorithm from the specification: /* Perform the NFSR feedback algorithm from the specification:
* *
* b'[i] = b[i + 1] * b'[i] = b[i + 1]
* b'[127] = s'[127] ^ b[0] ^ b[26] ^ b[56] ^ b[91] ^ b[96] * b'[127] = s[0] ^ b[0] ^ b[26] ^ b[56] ^ b[91] ^ b[96]
* ^ (b[3] & b[67]) ^ (b[11] & b[13]) ^ (b[17] & b[18]) * ^ (b[3] & b[67]) ^ (b[11] & b[13]) ^ (b[17] & b[18])
* ^ (b[27] & b[59]) ^ (b[40] & b[48]) ^ (b[61] & b[65]) * ^ (b[27] & b[59]) ^ (b[40] & b[48]) ^ (b[61] & b[65])
* ^ (b[68] & b[84]) ^ (b[22] & b[24] & b[25]) * ^ (b[68] & b[84]) ^ (b[22] & b[24] & b[25])
...@@ -106,14 +101,19 @@ static void grain128_core ...@@ -106,14 +101,19 @@ static void grain128_core
state->nfsr[3] = x2; state->nfsr[3] = x2;
} }
/** #define grain128_preoutput grain128_preoutput_inner
* \brief Generates 32 bits of pre-output data. #define grain128_preoutput_setup(state) grain128_preoutput((state))
*
* \param state Grain-128 state. #else /* __AVR__ */
*
* \return The generated 32 bits of pre-output data. /* For some reason, the AVR assembly preoutput doesn't work for key setup
*/ * but does work everywhere else. Investigate and fix this later. */
static uint32_t grain128_preoutput(const grain128_state_t *state) uint32_t grain128_preoutput(const grain128_state_t *state);
#define grain128_preoutput_setup(state) grain128_preoutput_inner((state))
#endif /* __AVR__ */
uint32_t grain128_preoutput_inner(const grain128_state_t *state)
{ {
uint32_t s0, s1, s2, s3; uint32_t s0, s1, s2, s3;
uint32_t b0, b1, b2, b3; uint32_t b0, b1, b2, b3;
...@@ -170,12 +170,37 @@ static uint32_t grain128_preoutput(const grain128_state_t *state) ...@@ -170,12 +170,37 @@ static uint32_t grain128_preoutput(const grain128_state_t *state)
(_y) = (((_y) & (mask)) << (shift)) | (((_y) >> (shift)) & (mask)); \ (_y) = (((_y) & (mask)) << (shift)) | (((_y) >> (shift)) & (mask)); \
} while (0) } while (0)
#if defined(__AVR__)
#define GRAIN128_ASM_HELPERS 1
#endif
#if defined(GRAIN128_ASM_HELPERS)
/**
* \brief Loads a 32-bit word and swaps it from big-endian bit order
* into little-endian bit order.
*
* \param data Points to the word to be loaded.
* \return Little-endian version of the 32-bit word at \a data.
*/
uint32_t grain128_swap_word32(const unsigned char *data);
/**
* \brief Interleaves the bits in a 16-byte keystream block to separate
* out the even and odd bits.
*
* \param ks Points to the keystream block.
*/
void grain128_interleave(unsigned char *ks);
#endif
void grain128_setup void grain128_setup
(grain128_state_t *state, const unsigned char *key, (grain128_state_t *state, const unsigned char *key,
const unsigned char *nonce) const unsigned char *nonce)
{ {
uint32_t k[4]; uint32_t k[4];
unsigned round; uint8_t round;
/* Internally, the Grain-128 stream cipher uses big endian bit /* Internally, the Grain-128 stream cipher uses big endian bit
* order, but the Grain-128AEAD specification for NIST uses little * order, but the Grain-128AEAD specification for NIST uses little
...@@ -187,26 +212,33 @@ void grain128_setup ...@@ -187,26 +212,33 @@ void grain128_setup
* P = [7 6 5 4 3 2 1 0 15 14 13 12 11 10 9 8 * P = [7 6 5 4 3 2 1 0 15 14 13 12 11 10 9 8
* 23 22 21 20 19 18 17 16 31 30 29 28 27 26 25 24] * 23 22 21 20 19 18 17 16 31 30 29 28 27 26 25 24]
*/ */
#if defined(GRAIN128_ASM_HELPERS)
#define SWAP_BITS(out, in) \ #define SWAP_BITS(out, in) \
do { \ do { \
uint32_t tmp = (in); \ (out) = grain128_swap_word32((in)); \
} while (0)
#else
#define SWAP_BITS(out, in) \
do { \
uint32_t tmp = be_load_word32((in)); \
bit_permute_step_simple(tmp, 0x55555555, 1); \ bit_permute_step_simple(tmp, 0x55555555, 1); \
bit_permute_step_simple(tmp, 0x33333333, 2); \ bit_permute_step_simple(tmp, 0x33333333, 2); \
bit_permute_step_simple(tmp, 0x0f0f0f0f, 4); \ bit_permute_step_simple(tmp, 0x0f0f0f0f, 4); \
(out) = tmp; \ (out) = tmp; \
} while (0) } while (0)
#endif
/* Initialize the LFSR state with the nonce and padding */ /* Initialize the LFSR state with the nonce and padding */
SWAP_BITS(state->lfsr[0], be_load_word32(nonce)); SWAP_BITS(state->lfsr[0], nonce);
SWAP_BITS(state->lfsr[1], be_load_word32(nonce + 4)); SWAP_BITS(state->lfsr[1], nonce + 4);
SWAP_BITS(state->lfsr[2], be_load_word32(nonce + 8)); SWAP_BITS(state->lfsr[2], nonce + 8);
state->lfsr[3] = 0xFFFFFFFEU; /* pad with all-1s and a terminating 0 */ state->lfsr[3] = 0xFFFFFFFEU; /* pad with all-1s and a terminating 0 */
/* Initialize the NFSR state with the key */ /* Initialize the NFSR state with the key */
SWAP_BITS(k[0], be_load_word32(key)); SWAP_BITS(k[0], key);
SWAP_BITS(k[1], be_load_word32(key + 4)); SWAP_BITS(k[1], key + 4);
SWAP_BITS(k[2], be_load_word32(key + 8)); SWAP_BITS(k[2], key + 8);
SWAP_BITS(k[3], be_load_word32(key + 12)); SWAP_BITS(k[3], key + 12);
state->nfsr[0] = k[0]; state->nfsr[0] = k[0];
state->nfsr[1] = k[1]; state->nfsr[1] = k[1];
state->nfsr[2] = k[2]; state->nfsr[2] = k[2];
...@@ -215,7 +247,7 @@ void grain128_setup ...@@ -215,7 +247,7 @@ void grain128_setup
/* Perform 256 rounds of Grain-128 to mix up the initial state. /* Perform 256 rounds of Grain-128 to mix up the initial state.
* The rounds can be performed 32 at a time: 32 * 8 = 256 */ * The rounds can be performed 32 at a time: 32 * 8 = 256 */
for (round = 0; round < 8; ++round) { for (round = 0; round < 8; ++round) {
uint32_t y = grain128_preoutput(state); uint32_t y = grain128_preoutput_setup(state);
grain128_core(state, y, y); grain128_core(state, y, y);
} }
...@@ -241,6 +273,7 @@ void grain128_setup ...@@ -241,6 +273,7 @@ void grain128_setup
*/ */
static void grain128_next_keystream(grain128_state_t *state) static void grain128_next_keystream(grain128_state_t *state)
{ {
#if !defined(GRAIN128_ASM_HELPERS)
unsigned posn; unsigned posn;
for (posn = 0; posn < sizeof(state->ks); posn += 4) { for (posn = 0; posn < sizeof(state->ks); posn += 4) {
/* Get the next word of pre-output and run the Grain-128 core */ /* Get the next word of pre-output and run the Grain-128 core */
...@@ -264,6 +297,16 @@ static void grain128_next_keystream(grain128_state_t *state) ...@@ -264,6 +297,16 @@ static void grain128_next_keystream(grain128_state_t *state)
bit_permute_step_simple(x, 0x00ff00ff, 8); bit_permute_step_simple(x, 0x00ff00ff, 8);
be_store_word32(state->ks + posn, x); be_store_word32(state->ks + posn, x);
} }
#else
/* Generate the data and then perform the interleaving */
unsigned posn;
for (posn = 0; posn < sizeof(state->ks); posn += 4) {
uint32_t x = grain128_preoutput(state);
le_store_word32(state->ks + posn, x);
grain128_core(state, 0, 0);
}
grain128_interleave(state->ks);
#endif
} }
void grain128_authenticate void grain128_authenticate
...@@ -394,6 +437,8 @@ void grain128_decrypt ...@@ -394,6 +437,8 @@ void grain128_decrypt
state->posn = posn; state->posn = posn;
} }
#if !defined(__AVR__)
void grain128_compute_tag(grain128_state_t *state) void grain128_compute_tag(grain128_state_t *state)
{ {
uint64_t x; uint64_t x;
...@@ -409,3 +454,5 @@ void grain128_compute_tag(grain128_state_t *state) ...@@ -409,3 +454,5 @@ void grain128_compute_tag(grain128_state_t *state)
bit_permute_step_simple(x, 0x0f0f0f0f0f0f0f0fULL, 4); bit_permute_step_simple(x, 0x0f0f0f0f0f0f0f0fULL, 4);
be_store_word64(state->ks, x); be_store_word64(state->ks, x);
} }
#endif /* !__AVR__ */
...@@ -28,6 +28,8 @@ ...@@ -28,6 +28,8 @@
/** /**
* \file internal-grain128.h * \file internal-grain128.h
* \brief Internal implementation of the Grain-128 stream cipher. * \brief Internal implementation of the Grain-128 stream cipher.
*
* References: https://grain-128aead.github.io/
*/ */
#ifdef __cplusplus #ifdef __cplusplus
...@@ -52,6 +54,25 @@ typedef struct ...@@ -52,6 +54,25 @@ typedef struct
} grain128_state_t; } grain128_state_t;
/** /**
* \brief Performs 32 rounds of Grain-128 in parallel.
*
* \param state Grain-128 state.
* \param x 32 bits of input to be incorporated into the LFSR state, or zero.
* \param x2 Another 32 bits to be incorporated into the NFSR state, or zero.
*/
void grain128_core
(grain128_state_t *state, uint32_t x, uint32_t x2);
/**
* \brief Generates 32 bits of pre-output data.
*
* \param state Grain-128 state.
*
* \return The generated 32 bits of pre-output data.
*/
uint32_t grain128_preoutput(const grain128_state_t *state);
/**
* \brief Sets up the initial Grain-128 state with the key and nonce. * \brief Sets up the initial Grain-128 state with the key and nonce.
* *
* \param state Grain-128 state to be initialized. * \param state Grain-128 state to be initialized.
......
...@@ -9,7 +9,7 @@ int crypto_aead_encrypt ...@@ -9,7 +9,7 @@ int crypto_aead_encrypt
const unsigned char *npub, const unsigned char *npub,
const unsigned char *k) const unsigned char *k)
{ {
return hyena_aead_encrypt return hyena_v1_aead_encrypt
(c, clen, m, mlen, ad, adlen, nsec, npub, k); (c, clen, m, mlen, ad, adlen, nsec, npub, k);
} }
...@@ -21,6 +21,6 @@ int crypto_aead_decrypt ...@@ -21,6 +21,6 @@ int crypto_aead_decrypt
const unsigned char *npub, const unsigned char *npub,
const unsigned char *k) const unsigned char *k)
{ {
return hyena_aead_decrypt return hyena_v1_aead_decrypt
(m, mlen, nsec, c, clen, ad, adlen, npub, k); (m, mlen, nsec, c, clen, ad, adlen, npub, k);
} }
...@@ -33,6 +33,12 @@ ...@@ -33,6 +33,12 @@
* GIFT-128 block cipher. The algorithm has a 128-bit key, a 96-bit nonce, * GIFT-128 block cipher. The algorithm has a 128-bit key, a 96-bit nonce,
* and a 128-bit authentication tag. * and a 128-bit authentication tag.
* *
* This library implements both the v1 and v2 versions of HYENA from the
* authors. The v1 version was submitted to the second round of the
* NIST Lightweight Cryptography Competition but was later found to have a
* forgery attack. The authors fixed this with v2 but it was too late to
* submit the update for the second round.
*
* References: https://www.isical.ac.in/~lightweight/hyena/ * References: https://www.isical.ac.in/~lightweight/hyena/
*/ */
...@@ -56,12 +62,76 @@ extern "C" { ...@@ -56,12 +62,76 @@ extern "C" {
#define HYENA_NONCE_SIZE 12 #define HYENA_NONCE_SIZE 12
/** /**
* \brief Meta-information block for the HYENA cipher. * \brief Meta-information block for the HYENA-v1 cipher.
*/
extern aead_cipher_t const hyena_v1_cipher;
/**
* \brief Meta-information block for the HYENA-v2 cipher.
*/
extern aead_cipher_t const hyena_v2_cipher;
/**
* \brief Encrypts and authenticates a packet with HYENA-v1.
*
* \param c Buffer to receive the output.
* \param clen On exit, set to the length of the output which includes
* the ciphertext and the 16 byte authentication tag.
* \param m Buffer that contains the plaintext message to encrypt.
* \param mlen Length of the plaintext message in bytes.
* \param ad Buffer that contains associated data to authenticate
* along with the packet but which does not need to be encrypted.
* \param adlen Length of the associated data in bytes.
* \param nsec Secret nonce - not used by this algorithm.
* \param npub Points to the public nonce for the packet which must
* be 12 bytes in length.
* \param k Points to the 16 bytes of the key to use to encrypt the packet.
*
* \return 0 on success, or a negative value if there was an error in
* the parameters.
*
* \sa hyena_aead_decrypt()
*/ */
extern aead_cipher_t const hyena_cipher; int hyena_v1_aead_encrypt
(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k);
/**
* \brief Decrypts and authenticates a packet with HYENA-v1.
*
* \param m Buffer to receive the plaintext message on output.
* \param mlen Receives the length of the plaintext message on output.
* \param nsec Secret nonce - not used by this algorithm.
* \param c Buffer that contains the ciphertext and authentication
* tag to decrypt.
* \param clen Length of the input data in bytes, which includes the
* ciphertext and the 16 byte authentication tag.
* \param ad Buffer that contains associated data to authenticate
* along with the packet but which does not need to be encrypted.
* \param adlen Length of the associated data in bytes.
* \param npub Points to the public nonce for the packet which must
* be 12 bytes in length.
* \param k Points to the 16 bytes of the key to use to decrypt the packet.
*
* \return 0 on success, -1 if the authentication tag was incorrect,
* or some other negative number if there was an error in the parameters.
*
* \sa hyena_aead_encrypt()
*/
int hyena_v1_aead_decrypt
(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k);
/** /**
* \brief Encrypts and authenticates a packet with HYENA. * \brief Encrypts and authenticates a packet with HYENA-v2.
* *
* \param c Buffer to receive the output. * \param c Buffer to receive the output.
* \param clen On exit, set to the length of the output which includes * \param clen On exit, set to the length of the output which includes
...@@ -81,7 +151,7 @@ extern aead_cipher_t const hyena_cipher; ...@@ -81,7 +151,7 @@ extern aead_cipher_t const hyena_cipher;
* *
* \sa hyena_aead_decrypt() * \sa hyena_aead_decrypt()
*/ */
int hyena_aead_encrypt int hyena_v2_aead_encrypt
(unsigned char *c, unsigned long long *clen, (unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen, const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen, const unsigned char *ad, unsigned long long adlen,
...@@ -90,7 +160,7 @@ int hyena_aead_encrypt ...@@ -90,7 +160,7 @@ int hyena_aead_encrypt
const unsigned char *k); const unsigned char *k);
/** /**
* \brief Decrypts and authenticates a packet with HYENA. * \brief Decrypts and authenticates a packet with HYENA-v2.
* *
* \param m Buffer to receive the plaintext message on output. * \param m Buffer to receive the plaintext message on output.
* \param mlen Receives the length of the plaintext message on output. * \param mlen Receives the length of the plaintext message on output.
...@@ -111,7 +181,7 @@ int hyena_aead_encrypt ...@@ -111,7 +181,7 @@ int hyena_aead_encrypt
* *
* \sa hyena_aead_encrypt() * \sa hyena_aead_encrypt()
*/ */
int hyena_aead_decrypt int hyena_v2_aead_decrypt
(unsigned char *m, unsigned long long *mlen, (unsigned char *m, unsigned long long *mlen,
unsigned char *nsec, unsigned char *nsec,
const unsigned char *c, unsigned long long clen, const unsigned char *c, unsigned long long clen,
......
...@@ -47,11 +47,13 @@ ...@@ -47,11 +47,13 @@
* in any of the NIST submissions so we don't bother with it in this library. * in any of the NIST submissions so we don't bother with it in this library.
* *
* References: https://eprint.iacr.org/2017/622.pdf, * References: https://eprint.iacr.org/2017/622.pdf,
* https://eprint.iacr.org/2020/412.pdf,
* https://giftcipher.github.io/gift/ * https://giftcipher.github.io/gift/
*/ */
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include "internal-gift128-config.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
...@@ -63,16 +65,23 @@ extern "C" { ...@@ -63,16 +65,23 @@ extern "C" {
#define GIFT128_BLOCK_SIZE 16 #define GIFT128_BLOCK_SIZE 16
/** /**
* \brief Number of round keys for the fixsliced representation of GIFT-128. * \var GIFT128_ROUND_KEYS
* \brief Number of round keys for the GIFT-128 key schedule.
*/ */
#if GIFT128_VARIANT == GIFT128_VARIANT_TINY
#define GIFT128_ROUND_KEYS 4
#elif GIFT128_VARIANT == GIFT128_VARIANT_SMALL
#define GIFT128_ROUND_KEYS 20
#else
#define GIFT128_ROUND_KEYS 80 #define GIFT128_ROUND_KEYS 80
#endif
/** /**
* \brief Structure of the key schedule for GIFT-128 (bit-sliced). * \brief Structure of the key schedule for GIFT-128 (bit-sliced).
*/ */
typedef struct typedef struct
{ {
/** Pre-computed round keys in the fixsliced form */ /** Pre-computed round keys for bit-sliced GIFT-128 */
uint32_t k[GIFT128_ROUND_KEYS]; uint32_t k[GIFT128_ROUND_KEYS];
} gift128b_key_schedule_t; } gift128b_key_schedule_t;
...@@ -81,14 +90,9 @@ typedef struct ...@@ -81,14 +90,9 @@ typedef struct
* \brief Initializes the key schedule for GIFT-128 (bit-sliced). * \brief Initializes the key schedule for GIFT-128 (bit-sliced).
* *
* \param ks Points to the key schedule to initialize. * \param ks Points to the key schedule to initialize.
* \param key Points to the key data. * \param key Points to the 16 bytes of the key data.
* \param key_len Length of the key data, which must be 16.
*
* \return Non-zero on success or zero if there is something wrong
* with the parameters.
*/ */
int gift128b_init void gift128b_init(gift128b_key_schedule_t *ks, const unsigned char *key);
(gift128b_key_schedule_t *ks, const unsigned char *key, size_t key_len);
/** /**
* \brief Encrypts a 128-bit block with GIFT-128 (bit-sliced). * \brief Encrypts a 128-bit block with GIFT-128 (bit-sliced).
...@@ -145,14 +149,9 @@ typedef gift128b_key_schedule_t gift128n_key_schedule_t; ...@@ -145,14 +149,9 @@ typedef gift128b_key_schedule_t gift128n_key_schedule_t;
* \brief Initializes the key schedule for GIFT-128 (nibble-based). * \brief Initializes the key schedule for GIFT-128 (nibble-based).
* *
* \param ks Points to the key schedule to initialize. * \param ks Points to the key schedule to initialize.
* \param key Points to the key data. * \param key Points to the 16 bytes of the key data.
* \param key_len Length of the key data, which must be 16.
*
* \return Non-zero on success or zero if there is something wrong
* with the parameters.
*/ */
int gift128n_init void gift128n_init(gift128n_key_schedule_t *ks, const unsigned char *key);
(gift128n_key_schedule_t *ks, const unsigned char *key, size_t key_len);
/** /**
* \brief Encrypts a 128-bit block with GIFT-128 (nibble-based). * \brief Encrypts a 128-bit block with GIFT-128 (nibble-based).
...@@ -182,13 +181,31 @@ void gift128n_decrypt ...@@ -182,13 +181,31 @@ void gift128n_decrypt
(const gift128n_key_schedule_t *ks, unsigned char *output, (const gift128n_key_schedule_t *ks, unsigned char *output,
const unsigned char *input); const unsigned char *input);
/* 4-bit tweak values expanded to 32-bit for TweGIFT-128 */
#define GIFT128T_TWEAK_0 0x00000000 /**< TweGIFT-128 tweak value 0 */
#define GIFT128T_TWEAK_1 0xe1e1e1e1 /**< TweGIFT-128 tweak value 1 */
#define GIFT128T_TWEAK_2 0xd2d2d2d2 /**< TweGIFT-128 tweak value 2 */
#define GIFT128T_TWEAK_3 0x33333333 /**< TweGIFT-128 tweak value 3 */
#define GIFT128T_TWEAK_4 0xb4b4b4b4 /**< TweGIFT-128 tweak value 4 */
#define GIFT128T_TWEAK_5 0x55555555 /**< TweGIFT-128 tweak value 5 */
#define GIFT128T_TWEAK_6 0x66666666 /**< TweGIFT-128 tweak value 6 */
#define GIFT128T_TWEAK_7 0x87878787 /**< TweGIFT-128 tweak value 7 */
#define GIFT128T_TWEAK_8 0x78787878 /**< TweGIFT-128 tweak value 8 */
#define GIFT128T_TWEAK_9 0x99999999 /**< TweGIFT-128 tweak value 9 */
#define GIFT128T_TWEAK_10 0xaaaaaaaa /**< TweGIFT-128 tweak value 10 */
#define GIFT128T_TWEAK_11 0x4b4b4b4b /**< TweGIFT-128 tweak value 11 */
#define GIFT128T_TWEAK_12 0xcccccccc /**< TweGIFT-128 tweak value 12 */
#define GIFT128T_TWEAK_13 0x2d2d2d2d /**< TweGIFT-128 tweak value 13 */
#define GIFT128T_TWEAK_14 0x1e1e1e1e /**< TweGIFT-128 tweak value 14 */
#define GIFT128T_TWEAK_15 0xffffffff /**< TweGIFT-128 tweak value 15 */
/** /**
* \brief Encrypts a 128-bit block with TweGIFT-128 (tweakable variant). * \brief Encrypts a 128-bit block with TweGIFT-128 (tweakable variant).
* *
* \param ks Points to the GIFT-128 key schedule. * \param ks Points to the GIFT-128 key schedule.
* \param output Output buffer which must be at least 16 bytes in length. * \param output Output buffer which must be at least 16 bytes in length.
* \param input Input buffer which must be at least 16 bytes in length. * \param input Input buffer which must be at least 16 bytes in length.
* \param tweak 4-bit tweak value. * \param tweak 4-bit tweak value expanded to 32-bit.
* *
* The \a input and \a output buffers can be the same buffer for * The \a input and \a output buffers can be the same buffer for
* in-place encryption. * in-place encryption.
...@@ -200,7 +217,7 @@ void gift128n_decrypt ...@@ -200,7 +217,7 @@ void gift128n_decrypt
*/ */
void gift128t_encrypt void gift128t_encrypt
(const gift128n_key_schedule_t *ks, unsigned char *output, (const gift128n_key_schedule_t *ks, unsigned char *output,
const unsigned char *input, unsigned char tweak); const unsigned char *input, uint32_t tweak);
/** /**
* \brief Decrypts a 128-bit block with TweGIFT-128 (tweakable variant). * \brief Decrypts a 128-bit block with TweGIFT-128 (tweakable variant).
...@@ -208,7 +225,7 @@ void gift128t_encrypt ...@@ -208,7 +225,7 @@ void gift128t_encrypt
* \param ks Points to the GIFT-128 key schedule. * \param ks Points to the GIFT-128 key schedule.
* \param output Output buffer which must be at least 16 bytes in length. * \param output Output buffer which must be at least 16 bytes in length.
* \param input Input buffer which must be at least 16 bytes in length. * \param input Input buffer which must be at least 16 bytes in length.
* \param tweak 4-bit tweak value. * \param tweak 4-bit tweak value expanded to 32-bit.
* *
* The \a input and \a output buffers can be the same buffer for * The \a input and \a output buffers can be the same buffer for
* in-place encryption. * in-place encryption.
...@@ -220,7 +237,7 @@ void gift128t_encrypt ...@@ -220,7 +237,7 @@ void gift128t_encrypt
*/ */
void gift128t_decrypt void gift128t_decrypt
(const gift128n_key_schedule_t *ks, unsigned char *output, (const gift128n_key_schedule_t *ks, unsigned char *output,
const unsigned char *input, unsigned char tweak); const unsigned char *input, uint32_t tweak);
#ifdef __cplusplus #ifdef __cplusplus
} }
......
...@@ -238,6 +238,17 @@ ...@@ -238,6 +238,17 @@
} \ } \
} while (0) } while (0)
/* Rotation functions need to be optimised for best performance on AVR.
* The most efficient rotations are where the number of bits is 1 or a
* multiple of 8, so we compose the efficient rotations to produce all
* other rotation counts of interest. */
#if defined(__AVR__)
#define LW_CRYPTO_ROTATE32_COMPOSED 1
#else
#define LW_CRYPTO_ROTATE32_COMPOSED 0
#endif
/* Rotation macros for 32-bit arguments */ /* Rotation macros for 32-bit arguments */
/* Generic left rotate */ /* Generic left rotate */
...@@ -254,6 +265,8 @@ ...@@ -254,6 +265,8 @@
(_temp >> (bits)) | (_temp << (32 - (bits))); \ (_temp >> (bits)) | (_temp << (32 - (bits))); \
})) }))
#if !LW_CRYPTO_ROTATE32_COMPOSED
/* Left rotate by a specific number of bits. These macros may be replaced /* Left rotate by a specific number of bits. These macros may be replaced
* with more efficient ones on platforms that lack a barrel shifter */ * with more efficient ones on platforms that lack a barrel shifter */
#define leftRotate1(a) (leftRotate((a), 1)) #define leftRotate1(a) (leftRotate((a), 1))
...@@ -322,6 +335,138 @@ ...@@ -322,6 +335,138 @@
#define rightRotate30(a) (rightRotate((a), 30)) #define rightRotate30(a) (rightRotate((a), 30))
#define rightRotate31(a) (rightRotate((a), 31)) #define rightRotate31(a) (rightRotate((a), 31))
#else /* LW_CRYPTO_ROTATE32_COMPOSED */
/* Composed rotation macros where 1 and 8 are fast, but others are slow */
/* Left rotate by 1 */
#define leftRotate1(a) (leftRotate((a), 1))
/* Left rotate by 2 */
#define leftRotate2(a) (leftRotate(leftRotate((a), 1), 1))
/* Left rotate by 3 */
#define leftRotate3(a) (leftRotate(leftRotate(leftRotate((a), 1), 1), 1))
/* Left rotate by 4 */
#define leftRotate4(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 1), 1), 1), 1))
/* Left rotate by 5: Rotate left by 8, then right by 3 */
#define leftRotate5(a) (rightRotate(rightRotate(rightRotate(leftRotate((a), 8), 1), 1), 1))
/* Left rotate by 6: Rotate left by 8, then right by 2 */
#define leftRotate6(a) (rightRotate(rightRotate(leftRotate((a), 8), 1), 1))
/* Left rotate by 7: Rotate left by 8, then right by 1 */
#define leftRotate7(a) (rightRotate(leftRotate((a), 8), 1))
/* Left rotate by 8 */
#define leftRotate8(a) (leftRotate((a), 8))
/* Left rotate by 9: Rotate left by 8, then left by 1 */
#define leftRotate9(a) (leftRotate(leftRotate((a), 8), 1))
/* Left rotate by 10: Rotate left by 8, then left by 2 */
#define leftRotate10(a) (leftRotate(leftRotate(leftRotate((a), 8), 1), 1))
/* Left rotate by 11: Rotate left by 8, then left by 3 */
#define leftRotate11(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 8), 1), 1), 1))
/* Left rotate by 12: Rotate left by 16, then right by 4 */
#define leftRotate12(a) (rightRotate(rightRotate(rightRotate(rightRotate(leftRotate((a), 16), 1), 1), 1), 1))
/* Left rotate by 13: Rotate left by 16, then right by 3 */
#define leftRotate13(a) (rightRotate(rightRotate(rightRotate(leftRotate((a), 16), 1), 1), 1))
/* Left rotate by 14: Rotate left by 16, then right by 2 */
#define leftRotate14(a) (rightRotate(rightRotate(leftRotate((a), 16), 1), 1))
/* Left rotate by 15: Rotate left by 16, then right by 1 */
#define leftRotate15(a) (rightRotate(leftRotate((a), 16), 1))
/* Left rotate by 16 */
#define leftRotate16(a) (leftRotate((a), 16))
/* Left rotate by 17: Rotate left by 16, then left by 1 */
#define leftRotate17(a) (leftRotate(leftRotate((a), 16), 1))
/* Left rotate by 18: Rotate left by 16, then left by 2 */
#define leftRotate18(a) (leftRotate(leftRotate(leftRotate((a), 16), 1), 1))
/* Left rotate by 19: Rotate left by 16, then left by 3 */
#define leftRotate19(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 16), 1), 1), 1))
/* Left rotate by 20: Rotate left by 16, then left by 4 */
#define leftRotate20(a) (leftRotate(leftRotate(leftRotate(leftRotate(leftRotate((a), 16), 1), 1), 1), 1))
/* Left rotate by 21: Rotate left by 24, then right by 3 */
#define leftRotate21(a) (rightRotate(rightRotate(rightRotate(leftRotate((a), 24), 1), 1), 1))
/* Left rotate by 22: Rotate left by 24, then right by 2 */
#define leftRotate22(a) (rightRotate(rightRotate(leftRotate((a), 24), 1), 1))
/* Left rotate by 23: Rotate left by 24, then right by 1 */
#define leftRotate23(a) (rightRotate(leftRotate((a), 24), 1))
/* Left rotate by 24 */
#define leftRotate24(a) (leftRotate((a), 24))
/* Left rotate by 25: Rotate left by 24, then left by 1 */
#define leftRotate25(a) (leftRotate(leftRotate((a), 24), 1))
/* Left rotate by 26: Rotate left by 24, then left by 2 */
#define leftRotate26(a) (leftRotate(leftRotate(leftRotate((a), 24), 1), 1))
/* Left rotate by 27: Rotate left by 24, then left by 3 */
#define leftRotate27(a) (leftRotate(leftRotate(leftRotate(leftRotate((a), 24), 1), 1), 1))
/* Left rotate by 28: Rotate right by 4 */
#define leftRotate28(a) (rightRotate(rightRotate(rightRotate(rightRotate((a), 1), 1), 1), 1))
/* Left rotate by 29: Rotate right by 3 */
#define leftRotate29(a) (rightRotate(rightRotate(rightRotate((a), 1), 1), 1))
/* Left rotate by 30: Rotate right by 2 */
#define leftRotate30(a) (rightRotate(rightRotate((a), 1), 1))
/* Left rotate by 31: Rotate right by 1 */
#define leftRotate31(a) (rightRotate((a), 1))
/* Define the 32-bit right rotations in terms of left rotations */
#define rightRotate1(a) (leftRotate31((a)))
#define rightRotate2(a) (leftRotate30((a)))
#define rightRotate3(a) (leftRotate29((a)))
#define rightRotate4(a) (leftRotate28((a)))
#define rightRotate5(a) (leftRotate27((a)))
#define rightRotate6(a) (leftRotate26((a)))
#define rightRotate7(a) (leftRotate25((a)))
#define rightRotate8(a) (leftRotate24((a)))
#define rightRotate9(a) (leftRotate23((a)))
#define rightRotate10(a) (leftRotate22((a)))
#define rightRotate11(a) (leftRotate21((a)))
#define rightRotate12(a) (leftRotate20((a)))
#define rightRotate13(a) (leftRotate19((a)))
#define rightRotate14(a) (leftRotate18((a)))
#define rightRotate15(a) (leftRotate17((a)))
#define rightRotate16(a) (leftRotate16((a)))
#define rightRotate17(a) (leftRotate15((a)))
#define rightRotate18(a) (leftRotate14((a)))
#define rightRotate19(a) (leftRotate13((a)))
#define rightRotate20(a) (leftRotate12((a)))
#define rightRotate21(a) (leftRotate11((a)))
#define rightRotate22(a) (leftRotate10((a)))
#define rightRotate23(a) (leftRotate9((a)))
#define rightRotate24(a) (leftRotate8((a)))
#define rightRotate25(a) (leftRotate7((a)))
#define rightRotate26(a) (leftRotate6((a)))
#define rightRotate27(a) (leftRotate5((a)))
#define rightRotate28(a) (leftRotate4((a)))
#define rightRotate29(a) (leftRotate3((a)))
#define rightRotate30(a) (leftRotate2((a)))
#define rightRotate31(a) (leftRotate1((a)))
#endif /* LW_CRYPTO_ROTATE32_COMPOSED */
/* Rotation macros for 64-bit arguments */ /* Rotation macros for 64-bit arguments */
/* Generic left rotate */ /* Generic left rotate */
......
...@@ -9,7 +9,7 @@ int crypto_aead_encrypt ...@@ -9,7 +9,7 @@ int crypto_aead_encrypt
const unsigned char *npub, const unsigned char *npub,
const unsigned char *k) const unsigned char *k)
{ {
return hyena_aead_encrypt return hyena_v2_aead_encrypt
(c, clen, m, mlen, ad, adlen, nsec, npub, k); (c, clen, m, mlen, ad, adlen, nsec, npub, k);
} }
...@@ -21,6 +21,6 @@ int crypto_aead_decrypt ...@@ -21,6 +21,6 @@ int crypto_aead_decrypt
const unsigned char *npub, const unsigned char *npub,
const unsigned char *k) const unsigned char *k)
{ {
return hyena_aead_decrypt return hyena_v2_aead_decrypt
(m, mlen, nsec, c, clen, ad, adlen, npub, k); (m, mlen, nsec, c, clen, ad, adlen, npub, k);
} }
...@@ -25,14 +25,24 @@ ...@@ -25,14 +25,24 @@
#include "internal-util.h" #include "internal-util.h"
#include <string.h> #include <string.h>
aead_cipher_t const hyena_cipher = { aead_cipher_t const hyena_v1_cipher = {
"HYENA", "HYENA-v1",
HYENA_KEY_SIZE, HYENA_KEY_SIZE,
HYENA_NONCE_SIZE, HYENA_NONCE_SIZE,
HYENA_TAG_SIZE, HYENA_TAG_SIZE,
AEAD_FLAG_LITTLE_ENDIAN, AEAD_FLAG_LITTLE_ENDIAN,
hyena_aead_encrypt, hyena_v1_aead_encrypt,
hyena_aead_decrypt hyena_v1_aead_decrypt
};
aead_cipher_t const hyena_v2_cipher = {
"HYENA-v2",
HYENA_KEY_SIZE,
HYENA_NONCE_SIZE,
HYENA_TAG_SIZE,
AEAD_FLAG_LITTLE_ENDIAN,
hyena_v2_aead_encrypt,
hyena_v2_aead_decrypt
}; };
/** /**
...@@ -69,7 +79,236 @@ static void hyena_triple_delta(unsigned char D[8]) ...@@ -69,7 +79,236 @@ static void hyena_triple_delta(unsigned char D[8])
} }
/** /**
* \brief Process the associated data for HYENA. * \brief Process the associated data for HYENA-v1.
*
* \param ks Key schedule for the GIFT-128 cipher.
* \param Y Internal hash state of HYENA.
* \param D Internal hash state of HYENA.
* \param ad Points to the associated data.
* \param adlen Length of the associated data in bytes.
*/
static void hyena_v1_process_ad
(const gift128n_key_schedule_t *ks, unsigned char Y[16],
unsigned char D[8], const unsigned char *ad,
unsigned long long adlen)
{
unsigned char feedback[16];
hyena_double_delta(D);
while (adlen > 16) {
memcpy(feedback, ad, 16);
lw_xor_block(feedback + 8, Y + 8, 8);
lw_xor_block(feedback + 8, D, 8);
lw_xor_block(Y, feedback, 16);
gift128n_encrypt(ks, Y, Y);
hyena_double_delta(D);
ad += 16;
adlen -= 16;
}
if (adlen == 16) {
hyena_double_delta(D);
memcpy(feedback, ad, 16);
lw_xor_block(feedback + 8, Y + 8, 8);
lw_xor_block(feedback + 8, D, 8);
lw_xor_block(Y, feedback, 16);
} else {
unsigned temp = (unsigned)adlen;
hyena_double_delta(D);
hyena_double_delta(D);
memcpy(feedback, ad, temp);
feedback[temp] = 0x01;
memset(feedback + temp + 1, 0, 15 - temp);
if (temp > 8)
lw_xor_block(feedback + 8, Y + 8, temp - 8);
lw_xor_block(feedback + 8, D, 8);
lw_xor_block(Y, feedback, 16);
}
}
int hyena_v1_aead_encrypt
(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k)
{
gift128n_key_schedule_t ks;
unsigned char Y[16];
unsigned char D[8];
unsigned char feedback[16];
unsigned index;
(void)nsec;
/* Set the length of the returned ciphertext */
*clen = mlen + HYENA_TAG_SIZE;
/* Set up the key schedule and use it to encrypt the nonce */
gift128n_init(&ks, k);
Y[0] = 0;
if (adlen == 0)
Y[0] |= 0x01;
if (adlen == 0 && mlen == 0)
Y[0] |= 0x02;
Y[1] = 0;
Y[2] = 0;
Y[3] = 0;
memcpy(Y + 4, npub, HYENA_NONCE_SIZE);
gift128n_encrypt(&ks, Y, Y);
memcpy(D, Y + 8, 8);
/* Process the associated data */
hyena_v1_process_ad(&ks, Y, D, ad, adlen);
/* Encrypt the plaintext to produce the ciphertext */
if (mlen > 0) {
while (mlen > 16) {
gift128n_encrypt(&ks, Y, Y);
hyena_double_delta(D);
memcpy(feedback, m, 16);
lw_xor_block(feedback + 8, Y + 8, 8);
lw_xor_block(feedback + 8, D, 8);
lw_xor_block_2_src(c, m, Y, 16);
lw_xor_block(Y, feedback, 16);
c += 16;
m += 16;
mlen -= 16;
}
gift128n_encrypt(&ks, Y, Y);
if (mlen == 16) {
hyena_double_delta(D);
hyena_double_delta(D);
memcpy(feedback, m, 16);
lw_xor_block(feedback + 8, Y + 8, 8);
lw_xor_block(feedback + 8, D, 8);
lw_xor_block_2_src(c, m, Y, 16);
lw_xor_block(Y, feedback, 16);
c += 16;
} else {
unsigned temp = (unsigned)mlen;
hyena_double_delta(D);
hyena_double_delta(D);
hyena_double_delta(D);
memcpy(feedback, m, temp);
feedback[temp] = 0x01;
memset(feedback + temp + 1, 0, 15 - temp);
if (temp > 8)
lw_xor_block(feedback + 8, Y + 8, temp - 8);
lw_xor_block(feedback + 8, D, 8);
lw_xor_block_2_src(c, m, Y, temp);
lw_xor_block(Y, feedback, 16);
c += temp;
}
}
/* Swap the two halves of Y and generate the authentication tag */
for (index = 0; index < 8; ++index) {
unsigned char temp1 = Y[index];
unsigned char temp2 = Y[index + 8];
Y[index] = temp2;
Y[index + 8] = temp1;
}
gift128n_encrypt(&ks, c, Y);
return 0;
}
int hyena_v1_aead_decrypt
(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k)
{
gift128n_key_schedule_t ks;
unsigned char Y[16];
unsigned char D[8];
unsigned char feedback[16];
unsigned char *mtemp;
unsigned index;
(void)nsec;
/* Validate the ciphertext length and set the return "mlen" value */
if (clen < HYENA_TAG_SIZE)
return -1;
*mlen = clen - HYENA_TAG_SIZE;
/* Set up the key schedule and use it to encrypt the nonce */
gift128n_init(&ks, k);
Y[0] = 0;
if (adlen == 0)
Y[0] |= 0x01;
if (adlen == 0 && clen == HYENA_TAG_SIZE)
Y[0] |= 0x02;
Y[1] = 0;
Y[2] = 0;
Y[3] = 0;
memcpy(Y + 4, npub, HYENA_NONCE_SIZE);
gift128n_encrypt(&ks, Y, Y);
memcpy(D, Y + 8, 8);
/* Process the associated data */
hyena_v1_process_ad(&ks, Y, D, ad, adlen);
/* Decrypt the ciphertext to produce the plaintext */
clen -= HYENA_TAG_SIZE;
mtemp = m;
if (clen > 0) {
while (clen > 16) {
gift128n_encrypt(&ks, Y, Y);
hyena_double_delta(D);
memcpy(feedback + 8, c + 8, 8);
lw_xor_block_2_src(m, c, Y, 16);
memcpy(feedback, m, 8);
lw_xor_block(feedback + 8, D, 8);
lw_xor_block(Y, feedback, 16);
c += 16;
m += 16;
clen -= 16;
}
gift128n_encrypt(&ks, Y, Y);
if (clen == 16) {
hyena_double_delta(D);
hyena_double_delta(D);
memcpy(feedback + 8, c + 8, 8);
lw_xor_block_2_src(m, c, Y, 16);
memcpy(feedback, m, 8);
lw_xor_block(feedback + 8, D, 8);
lw_xor_block(Y, feedback, 16);
c += 16;
} else {
unsigned temp = (unsigned)clen;
hyena_double_delta(D);
hyena_double_delta(D);
hyena_double_delta(D);
if (temp > 8) {
memcpy(feedback + 8, c + 8, temp - 8);
lw_xor_block_2_src(m, c, Y, temp);
memcpy(feedback, m, 8);
} else {
lw_xor_block_2_src(m, c, Y, temp);
memcpy(feedback, m, temp);
}
feedback[temp] = 0x01;
memset(feedback + temp + 1, 0, 15 - temp);
lw_xor_block(feedback + 8, D, 8);
lw_xor_block(Y, feedback, 16);
c += temp;
}
}
/* Swap the two halves of Y and check the authentication tag */
for (index = 0; index < 8; ++index) {
unsigned char temp1 = Y[index];
unsigned char temp2 = Y[index + 8];
Y[index] = temp2;
Y[index + 8] = temp1;
}
gift128n_encrypt(&ks, Y, Y);
return aead_check_tag(mtemp, *mlen, Y, c, HYENA_TAG_SIZE);
}
/**
* \brief Process the associated data for HYENA-v2.
* *
* \param ks Key schedule for the GIFT-128 cipher. * \param ks Key schedule for the GIFT-128 cipher.
* \param Y Internal hash state of HYENA. * \param Y Internal hash state of HYENA.
...@@ -77,7 +316,7 @@ static void hyena_triple_delta(unsigned char D[8]) ...@@ -77,7 +316,7 @@ static void hyena_triple_delta(unsigned char D[8])
* \param ad Points to the associated data. * \param ad Points to the associated data.
* \param adlen Length of the associated data in bytes. * \param adlen Length of the associated data in bytes.
*/ */
static void hyena_process_ad static void hyena_v2_process_ad
(const gift128n_key_schedule_t *ks, unsigned char Y[16], (const gift128n_key_schedule_t *ks, unsigned char Y[16],
unsigned char D[8], const unsigned char *ad, unsigned char D[8], const unsigned char *ad,
unsigned long long adlen) unsigned long long adlen)
...@@ -113,7 +352,7 @@ static void hyena_process_ad ...@@ -113,7 +352,7 @@ static void hyena_process_ad
} }
} }
int hyena_aead_encrypt int hyena_v2_aead_encrypt
(unsigned char *c, unsigned long long *clen, (unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen, const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen, const unsigned char *ad, unsigned long long adlen,
...@@ -146,7 +385,7 @@ int hyena_aead_encrypt ...@@ -146,7 +385,7 @@ int hyena_aead_encrypt
memcpy(D, Y + 8, 8); memcpy(D, Y + 8, 8);
/* Process the associated data */ /* Process the associated data */
hyena_process_ad(&ks, Y, D, ad, adlen); hyena_v2_process_ad(&ks, Y, D, ad, adlen);
/* Encrypt the plaintext to produce the ciphertext */ /* Encrypt the plaintext to produce the ciphertext */
if (mlen > 0) { if (mlen > 0) {
...@@ -198,7 +437,7 @@ int hyena_aead_encrypt ...@@ -198,7 +437,7 @@ int hyena_aead_encrypt
return 0; return 0;
} }
int hyena_aead_decrypt int hyena_v2_aead_decrypt
(unsigned char *m, unsigned long long *mlen, (unsigned char *m, unsigned long long *mlen,
unsigned char *nsec, unsigned char *nsec,
const unsigned char *c, unsigned long long clen, const unsigned char *c, unsigned long long clen,
...@@ -234,7 +473,7 @@ int hyena_aead_decrypt ...@@ -234,7 +473,7 @@ int hyena_aead_decrypt
memcpy(D, Y + 8, 8); memcpy(D, Y + 8, 8);
/* Process the associated data */ /* Process the associated data */
hyena_process_ad(&ks, Y, D, ad, adlen); hyena_v2_process_ad(&ks, Y, D, ad, adlen);
/* Decrypt the ciphertext to produce the plaintext */ /* Decrypt the ciphertext to produce the plaintext */
clen -= HYENA_TAG_SIZE; clen -= HYENA_TAG_SIZE;
......
...@@ -33,6 +33,12 @@ ...@@ -33,6 +33,12 @@
* GIFT-128 block cipher. The algorithm has a 128-bit key, a 96-bit nonce, * GIFT-128 block cipher. The algorithm has a 128-bit key, a 96-bit nonce,
* and a 128-bit authentication tag. * and a 128-bit authentication tag.
* *
* This library implements both the v1 and v2 versions of HYENA from the
* authors. The v1 version was submitted to the second round of the
* NIST Lightweight Cryptography Competition but was later found to have a
* forgery attack. The authors fixed this with v2 but it was too late to
* submit the update for the second round.
*
* References: https://www.isical.ac.in/~lightweight/hyena/ * References: https://www.isical.ac.in/~lightweight/hyena/
*/ */
...@@ -56,12 +62,76 @@ extern "C" { ...@@ -56,12 +62,76 @@ extern "C" {
#define HYENA_NONCE_SIZE 12 #define HYENA_NONCE_SIZE 12
/** /**
* \brief Meta-information block for the HYENA cipher. * \brief Meta-information block for the HYENA-v1 cipher.
*/
extern aead_cipher_t const hyena_v1_cipher;
/**
* \brief Meta-information block for the HYENA-v2 cipher.
*/
extern aead_cipher_t const hyena_v2_cipher;
/**
* \brief Encrypts and authenticates a packet with HYENA-v1.
*
* \param c Buffer to receive the output.
* \param clen On exit, set to the length of the output which includes
* the ciphertext and the 16 byte authentication tag.
* \param m Buffer that contains the plaintext message to encrypt.
* \param mlen Length of the plaintext message in bytes.
* \param ad Buffer that contains associated data to authenticate
* along with the packet but which does not need to be encrypted.
* \param adlen Length of the associated data in bytes.
* \param nsec Secret nonce - not used by this algorithm.
* \param npub Points to the public nonce for the packet which must
* be 12 bytes in length.
* \param k Points to the 16 bytes of the key to use to encrypt the packet.
*
* \return 0 on success, or a negative value if there was an error in
* the parameters.
*
* \sa hyena_aead_decrypt()
*/ */
extern aead_cipher_t const hyena_cipher; int hyena_v1_aead_encrypt
(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k);
/**
* \brief Decrypts and authenticates a packet with HYENA-v1.
*
* \param m Buffer to receive the plaintext message on output.
* \param mlen Receives the length of the plaintext message on output.
* \param nsec Secret nonce - not used by this algorithm.
* \param c Buffer that contains the ciphertext and authentication
* tag to decrypt.
* \param clen Length of the input data in bytes, which includes the
* ciphertext and the 16 byte authentication tag.
* \param ad Buffer that contains associated data to authenticate
* along with the packet but which does not need to be encrypted.
* \param adlen Length of the associated data in bytes.
* \param npub Points to the public nonce for the packet which must
* be 12 bytes in length.
* \param k Points to the 16 bytes of the key to use to decrypt the packet.
*
* \return 0 on success, -1 if the authentication tag was incorrect,
* or some other negative number if there was an error in the parameters.
*
* \sa hyena_aead_encrypt()
*/
int hyena_v1_aead_decrypt
(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k);
/** /**
* \brief Encrypts and authenticates a packet with HYENA. * \brief Encrypts and authenticates a packet with HYENA-v2.
* *
* \param c Buffer to receive the output. * \param c Buffer to receive the output.
* \param clen On exit, set to the length of the output which includes * \param clen On exit, set to the length of the output which includes
...@@ -81,7 +151,7 @@ extern aead_cipher_t const hyena_cipher; ...@@ -81,7 +151,7 @@ extern aead_cipher_t const hyena_cipher;
* *
* \sa hyena_aead_decrypt() * \sa hyena_aead_decrypt()
*/ */
int hyena_aead_encrypt int hyena_v2_aead_encrypt
(unsigned char *c, unsigned long long *clen, (unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen, const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen, const unsigned char *ad, unsigned long long adlen,
...@@ -90,7 +160,7 @@ int hyena_aead_encrypt ...@@ -90,7 +160,7 @@ int hyena_aead_encrypt
const unsigned char *k); const unsigned char *k);
/** /**
* \brief Decrypts and authenticates a packet with HYENA. * \brief Decrypts and authenticates a packet with HYENA-v2.
* *
* \param m Buffer to receive the plaintext message on output. * \param m Buffer to receive the plaintext message on output.
* \param mlen Receives the length of the plaintext message on output. * \param mlen Receives the length of the plaintext message on output.
...@@ -111,7 +181,7 @@ int hyena_aead_encrypt ...@@ -111,7 +181,7 @@ int hyena_aead_encrypt
* *
* \sa hyena_aead_encrypt() * \sa hyena_aead_encrypt()
*/ */
int hyena_aead_decrypt int hyena_v2_aead_decrypt
(unsigned char *m, unsigned long long *mlen, (unsigned char *m, unsigned long long *mlen,
unsigned char *nsec, unsigned char *nsec,
const unsigned char *c, unsigned long long clen, const unsigned char *c, unsigned long long clen,
......
#define CRYPTO_KEYBYTES 16 //
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include"crypto_aead.h"
#include"api.h"
#include <string.h>
#define U32BIG(x) (x)
#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0])))
#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n))))
#define sbox(a, b, c, d, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \
}
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
#define packFormat(out,in) {\
t1 = U32BIG(((u32*)in)[0]); \
t2 = U32BIG(((u32*)in)[1]); \
t3 = (t1 ^ (t1 >> 1)) & 0x22222222, t1 ^= t3 ^ (t3 << 1); \
t3 = (t1 ^ (t1 >> 2)) & 0x0C0C0C0C, t1 ^= t3 ^ (t3 << 2); \
t3 = (t1 ^ (t1 >> 4)) & 0x00F000F0, t1 ^= t3 ^ (t3 << 4); \
t3 = (t1 ^ (t1 >> 8)) & 0x0000FF00, t1 ^= t3 ^ (t3 << 8); \
t5 = (t2 ^ (t2 >> 1)) & 0x22222222, t2 ^= t5 ^ (t5 << 1); \
t5 = (t2 ^ (t2 >> 2)) & 0x0C0C0C0C, t2 ^= t5 ^ (t5 << 2); \
t5 = (t2 ^ (t2 >> 4)) & 0x00F000F0, t2 ^= t5 ^ (t5 << 4); \
t5 = (t2 ^ (t2 >> 8)) & 0x0000FF00, t2 ^= t5 ^ (t5 << 8); \
out[0] = (t2 & 0xFFFF0000) | (t1 >> 16); \
out[1] = (t2 << 16) | (t1 & 0x0000FFFF); \
}
#define unpackFormat(out, in) {\
t2 = (in[0] & 0xFFFF0000) | (in[1] >> 16); \
t1 = (in[1] & 0x0000FFFF) | (in[0] << 16); \
t3 = (t1 ^ (t1 >> 8)) & 0x0000FF00, t1 ^= t3 ^ (t3 << 8); \
t3 = (t1 ^ (t1 >> 4)) & 0x00F000F0, t1 ^= t3 ^ (t3 << 4); \
t3 = (t1 ^ (t1 >> 2)) & 0x0C0C0C0C, t1 ^= t3 ^ (t3 << 2); \
t3 = (t1 ^ (t1 >> 1)) & 0x22222222, t1 ^= t3 ^ (t3 << 1); \
t5 = (t2 ^ (t2 >> 8)) & 0x0000FF00, t2 ^= t5 ^ (t5 << 8); \
t5 = (t2 ^ (t2 >> 4)) & 0x00F000F0, t2 ^= t5 ^ (t5 << 4); \
t5 = (t2 ^ (t2 >> 2)) & 0x0C0C0C0C, t2 ^= t5 ^ (t5 << 2); \
t5 = (t2 ^ (t2 >> 1)) & 0x22222222, t2 ^= t5 ^ (t5 << 1); \
*((u64*)out) = ((u64)t2 << 32 | t1); \
}
#define getU32Format(out, in) {\
t1, t2 = U32BIG(((u32*)in)[0]); \
t1 = (t2 ^ (t2 >> 1)) & 0x22222222, t2 ^= t1 ^ (t1 << 1); \
t1 = (t2 ^ (t2 >> 2)) & 0x0C0C0C0C, t2 ^= t1 ^ (t1 << 2); \
t1 = (t2 ^ (t2 >> 4)) & 0x00F000F0, t2 ^= t1 ^ (t1 << 4); \
t1 = (t2 ^ (t2 >> 8)) & 0x0000FF00, t2 ^= t1 ^ (t1 << 8); \
*out = t2; \
}
#define ROUND256( constant6Format,lunNum) {\
s[0] ^= constant6Format[lunNum]>> 4;\
s[1] ^= constant6Format[lunNum]& 0x0f;\
sbox(s[0], s[2], s[4], s[6], s_temp[2], s_temp[4], s_temp[6]);\
sbox(s[1], s[3], s[5], s[7], s[2], s_temp[5], s_temp[7]);\
s[3] = LOTR32(s_temp[2], 1);\
s[4] = LOTR32(s_temp[4], 4);\
s[5] = LOTR32(s_temp[5], 4);\
s[6] = LOTR32(s_temp[7], 12);\
s[7] = LOTR32(s_temp[6], 13);\
}
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
#include"auxFormat.h"
#define RATE (64 / 8)
#define PR0_ROUNDS 52
#define PR_ROUNDS 28
#define PRF_ROUNDS 32
unsigned char constant6Format[63] = {
/*constant6_aead_128v1:*/
0x1,
0x10,
0x2,
0x20,
0x4,
0x41,
0x11,
0x12,
0x22,
0x24,
0x45,
0x50,
0x3,
0x30,
0x6,
0x61,
0x15,
0x53,
0x33,
0x36,
0x67,
0x74,
0x46,
0x60,
0x5,
0x51,
0x13,
0x32,
0x26,
0x65,
0x54,
0x42,
0x21,
0x14,
0x43,
0x31,
0x16,
0x63,
0x35,
0x57,
0x72,
0x27,
0x75,
0x56,
0x62,
0x25,
0x55,
0x52,
0x23,
0x34,
0x47,
0x70,
0x7,
0x71,
0x17,
0x73,
0x37,
0x77,
0x76,
0x66,
0x64,
0x44,
0x40,
};
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
unsigned int i, j;
u32 s[8] = { 0 };
u32 dataFormat[2] = { 0 };
u8 tempData[8];
u32 s_temp[8] = { 0 };
u32 t1, t2, t3, t5, t6, t8, t9, t11;
*clen = mlen + CRYPTO_ABYTES;
//initialization
packFormat(s, npub);
packFormat((s + 2), (npub + 8));
packFormat((s + 4), k);
packFormat((s + 6), (k + 8));
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND256(constant6Format,i);
}
// process associated data
if (adlen) {
while (adlen >= RATE) {
packFormat(dataFormat, ad);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND256(constant6Format, i);
}
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, ad, adlen * sizeof(unsigned char));
tempData[adlen] = 0x01;
packFormat(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND256(constant6Format, i);
}
}
s[6] ^= 0x80000000;
if (mlen) {
while (mlen >= RATE) {
packFormat(dataFormat, m);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
unpackFormat(c, s);
for (i = 0; i < PR_ROUNDS; i++) {
ROUND256(constant6Format, i);
}
mlen -= RATE;
m += RATE;
c += RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, m, mlen * sizeof(unsigned char));
tempData[mlen]= 0x01;
packFormat(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
unpackFormat(tempData, s);
memcpy(c, tempData, mlen * sizeof(unsigned char));
c +=mlen;
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND256(constant6Format, i);
}
// return tag
unpackFormat(tempData, s);
memcpy(c, tempData, sizeof(tempData));
unpackFormat(tempData,(s + 2));
memcpy(c+8, tempData, sizeof(tempData));
return 0;
}
int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec, const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k) {
u8 i, j;
// initialization
//256/32=8
u32 s[8] = { 0 };
u32 dataFormat[4] = { 0 };
u32 dataFormat_1[2] = { 0 };
u8 tempU8[32] = { 0 };
u8 tempData[8];
u32 s_temp[8] = { 0 };
u32 t1, t2, t3, t5, t6, t8, t9, t11;
*mlen = clen - CRYPTO_ABYTES;
if (clen < CRYPTO_ABYTES)
return -1;
//initialization
packFormat(s, npub);
packFormat((s + 2), (npub + 8));
packFormat((s + 4), k);
packFormat((s + 6), (k + 8));
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND256(constant6Format, i);
}
// process associated data
if (adlen) {
while (adlen >= RATE) {
packFormat(dataFormat, ad);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND256(constant6Format, i);
}
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, ad, adlen * sizeof(unsigned char));
tempData[adlen] = 0x01;
packFormat(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND256(constant6Format, i);
}
}
s[6] ^= 0x80000000;
// process c
clen = clen - CRYPTO_KEYBYTES;
if (clen) {
while (clen >= RATE) {
packFormat(dataFormat, c);
dataFormat_1[0] = s[0] ^ dataFormat[0];
dataFormat_1[1] = s[1] ^ dataFormat[1];
unpackFormat(m, dataFormat_1);
s[0] = dataFormat[0];
s[1] = dataFormat[1];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND256(constant6Format, i);
}
clen -= RATE;
m += RATE;
c += RATE;
}
unpackFormat(tempU8, s);
for (i = 0; i < clen; ++i, ++m, ++c)
{
*m = tempU8[i]^ *c;
tempU8[i] = *c;
}
tempU8[i] ^= 0x01;
packFormat(s, tempU8);
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND256(constant6Format, i);
}
// return tag
unpackFormat(tempU8, s);
unpackFormat((tempU8+8), (s+2));
if (memcmp((void*)tempU8, (void*)c,CRYPTO_ABYTES)) {
*mlen = 0;
memset(m, 0, sizeof(unsigned char) * (clen - CRYPTO_ABYTES));
return -1;
}
return 0;
}
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include"crypto_aead.h"
#include"api.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#define U32BIG(x) (x)
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0])))
#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n))))
#define puckU32ToThree(x){\
x &= 0x92492492;\
x = (x | (x << 2)) & 0xc30c30c3;\
x = (x | (x << 4)) & 0xf00f00f0;\
x = (x | (x << 8)) & 0xff0000ff;\
x = (x | (x << 16)) & 0xfff00000;\
}
#define unpuckU32ToThree(x){\
x &= 0xfff00000;\
x = (x | (x >> 16)) & 0xff0000ff;\
x = (x | (x >> 8)) & 0xf00f00f0;\
x = (x | (x >> 4)) & 0xc30c30c3;\
x = (x | (x >> 2)) & 0x92492492;\
}
#define packU32FormatToThreePacket( out, in) {\
t2 = U32BIG(((u32*)in)[0]); \
t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \
t2 = t2 << 2; \
temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \
puckU32ToThree(temp2[0]); \
puckU32ToThree(temp2[1]); \
puckU32ToThree(temp2[2]); \
out[0] = (temp2[0] >> 22); \
out[1] = (((u32)t2_64) << 10) | (temp2[1] >> 22); \
out[2] =(((u32)t2_65) << 10) | (temp2[2] >> 22); \
}
#define packU96FormatToThreePacket(out, in) {\
t9 = U32BIG(((u32*)in)[2]); \
t1 = U32BIG(((u32*)in)[1]); \
t2 = U32BIG(((u32*)in)[0]); \
t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \
t1 = t1 << 1; \
t2 = t2 << 2; \
temp0[0] = t9; temp0[1] = t9 << 1; temp0[2] = t9 << 2; \
puckU32ToThree(temp0[0]); \
puckU32ToThree(temp0[1]); \
puckU32ToThree(temp0[2]); \
temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; \
puckU32ToThree(temp1[0]); \
puckU32ToThree(temp1[1]); \
puckU32ToThree(temp1[2]); \
temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \
puckU32ToThree(temp2[0]); \
puckU32ToThree(temp2[1]); \
puckU32ToThree(temp2[2]); \
out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22); \
out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); \
out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); \
}
#define unpackU32FormatToThreePacket(out, in) {\
temp2[0] = (in[0] & 0x000003ff) << 22; \
t2_64 = ((in[1] & 0x00000400) << 21); \
temp2[1] = (in[1] & 0x000003ff) << 22; \
t2_65 = ((in[2] & 0x00000400) << 20); \
temp2[2] = (in[2] & 0x000003ff) << 22; \
unpuckU32ToThree(temp2[0]); \
unpuckU32ToThree(temp2[1]); \
unpuckU32ToThree(temp2[2]); \
t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); \
*(u32*)(out) = U32BIG(t2); \
}
#define unpackU96FormatToThreePacket( out, in) {\
temp0[0] = in[0] & 0xffe00000; \
temp1[0] = (in[0] & 0x001ffc00) << 11; \
temp2[0] = (in[0] & 0x000003ff) << 22; \
temp0[1] = in[1] & 0xffe00000; \
temp1[1] = (in[1] & 0x001ff800) << 11; \
t2_64 = ((in[1] & 0x00000400) << 21); \
temp2[1] = (in[1] & 0x000003ff) << 22; \
temp0[2] = in[2] & 0xffc00000; \
t1_32 = ((in[2] & 0x00200000) << 10); \
temp1[2] = (in[2] & 0x001ff800) << 11; \
t2_65 = ((in[2] & 0x00000400) << 20); \
temp2[2] = (in[2] & 0x000003ff) << 22; \
unpuckU32ToThree(temp0[0]); \
unpuckU32ToThree(temp0[1]); \
unpuckU32ToThree(temp0[2]); \
t9 = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2; \
unpuckU32ToThree(temp1[0]); \
unpuckU32ToThree(temp1[1]); \
unpuckU32ToThree(temp1[2]); \
t1 = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1); \
unpuckU32ToThree(temp2[0]); \
unpuckU32ToThree(temp2[1]); \
unpuckU32ToThree(temp2[2]); \
t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); \
*(u32*)(out) = U32BIG(t2); \
*(u32*)(out + 4) = U32BIG(t1); \
*(u32*)(out + 8) = U32BIG(t9); \
}
#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0])))
#define sbox(a, b, c, d, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; a = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \
}
#define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\
t3= LOTR32(t2, 2);\
t4 =LOTR32(t0, 3);\
t5 = LOTR32(t1, 3); \
}
#define U96_BIT_LOTR32_55(t0,t1,t2,t3,t4,t5){\
t3= LOTR32(t1, 18); \
t4 = LOTR32(t2, 18);\
t5 = LOTR32(t0, 19); \
}
/*
s0 s1 s2
s3 s4 s5
s6 s7 s8
s9 s10 s11
*/
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
#include"auxFormat.h"
#define aead_RATE (192 / 8)
#define PR0_ROUNDS 76
#define PR_ROUNDS 28
#define PRF_ROUNDS 32
unsigned char constant7Format[127] = {
/*constant7Format[127]:*/
0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90,
0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2,
0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99,
0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b,
0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92,
0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8,
0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,0xcf,0x5e,0xd3,0x9f,
0xdc,0xc6,0x16,0x91,0x8d,0x4c,0x43,0x1a,0xd0,0x87,0x1c,0xc1,
0x0f,0x59,0xca,0x57,0x9b,0xdd,0xce,0x56,0x93,0x9d,0xcc,0x46,
0x13,0x98,0xc5,0x0e,0x51,0x8a,0x55,0x8b,0x5d,0xcb,0x5f,0xdb,
0xdf,0xde,0xd6,0x96,0x94,0x84,0x04, };
#define ROUND384(lunNum) {\
s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;\
s[1] ^= (constant7Format[lunNum] >> 3) & 0x7;\
s[2] ^= constant7Format[lunNum] & 0x7;\
sbox(s[0], s[3], s[6], s[9] , s_temp[3], s_temp[6], s_temp[9]);\
sbox(s[1], s[4], s[7], s[10], s[3] , s_temp[7], s_temp[10]);\
sbox(s[2], s[5], s[8], s[11], s[4] , s_temp[8], s_temp[11]);\
s[5] = LOTR32(s_temp[3], 1); \
U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\
U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\
}
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
u8 i;
u32 s[12] = { 0 };
u8 tempData[24] = { 0 };
u32 dataFormat[6] = { 0 };
u32 s_temp[12] = { 0 };
u32 t1, t2, t3, t5, t6, t8, t9, t11;
u32 t1_32, t2_64, t2_65;
u32 temp0[3] = { 0 };
u32 temp1[3] = { 0 };
u32 temp2[3] = { 0 };
*clen = mlen + CRYPTO_ABYTES;
// initialization
packU96FormatToThreePacket(s, npub);
memcpy(tempData, npub+12, sizeof(unsigned char)*4);
memcpy(tempData+4, k, sizeof(unsigned char) * 16);
packU96FormatToThreePacket((s + 3), tempData);
packU96FormatToThreePacket((s + 6), (tempData+12));
s[9] = 0x80000000;
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND384(i);
}
// process associated data
if (adlen) {
while (adlen >= aead_RATE) {
packU96FormatToThreePacket(dataFormat, ad);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
packU96FormatToThreePacket((dataFormat+3), (ad+12));
s[3] ^= dataFormat[3];
s[4] ^= dataFormat[4];
s[5] ^= dataFormat[5];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
adlen -= aead_RATE;
ad += aead_RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, ad, adlen * sizeof(unsigned char));
tempData[adlen] = 0x01;
packU96FormatToThreePacket(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
packU96FormatToThreePacket((dataFormat + 3), (tempData + 12));
s[3] ^= dataFormat[3];
s[4] ^= dataFormat[4];
s[5] ^= dataFormat[5];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
}
s[9] ^= 0x80000000;
if (mlen) {
while (mlen >= aead_RATE) {
packU96FormatToThreePacket(dataFormat, m);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
packU96FormatToThreePacket((dataFormat + 3), (m + 12));
s[3] ^= dataFormat[3];
s[4] ^= dataFormat[4];
s[5] ^= dataFormat[5];
unpackU96FormatToThreePacket(c, s);
unpackU96FormatToThreePacket((c+12), (s+3));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
mlen -= aead_RATE;
m += aead_RATE;
c += aead_RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, m, mlen * sizeof(unsigned char));
tempData[mlen]= 0x01;
packU96FormatToThreePacket(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
packU96FormatToThreePacket((dataFormat + 3), (tempData + 12));
s[3] ^= dataFormat[3];
s[4] ^= dataFormat[4];
s[5] ^= dataFormat[5];
unpackU96FormatToThreePacket(tempData, s);
unpackU96FormatToThreePacket((tempData+12), (s+3));
memcpy(c, tempData, mlen * sizeof(unsigned char));
c += mlen;
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND384(i);
}
// return tag
unpackU96FormatToThreePacket(c, s);
unpackU96FormatToThreePacket(tempData, (s + 3));
memcpy(c+12, tempData, sizeof(unsigned char) * 4);
return 0;
}
int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec, const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k) {
u8 i, j;
u32 s[12] = { 0 };
u32 s_temp[12] = { 0 };
u32 dataFormat[12] = { 0 };
u32 dataFormat_1[12] = { 0 };
u8 tempData[24] = { 0 };
u8 tempU8[24] = { 0 };
u32 t1, t2, t3, t5, t6, t8, t9, t11;
u32 t1_32, t2_64, t2_65;
u32 temp0[3] = { 0 };
u32 temp1[3] = { 0 };
u32 temp2[3] = { 0 }; *mlen = clen - CRYPTO_ABYTES;
if (clen < CRYPTO_ABYTES)
return -1;
// initialization
packU96FormatToThreePacket(s, npub);
memcpy(tempData, npub + 12, sizeof(unsigned char) * 4);
memcpy(tempData + 4, k, sizeof(unsigned char) * 16);
packU96FormatToThreePacket((s + 3), tempData);
packU96FormatToThreePacket((s + 6), (tempData + 12));
s[9] = 0x80000000;
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND384(i);
}
// process associated data
if (adlen) {
while (adlen >= aead_RATE) {
packU96FormatToThreePacket(dataFormat, ad);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
packU96FormatToThreePacket((dataFormat + 3), (ad + 12));
s[3] ^= dataFormat[3];
s[4] ^= dataFormat[4];
s[5] ^= dataFormat[5];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
adlen -= aead_RATE;
ad += aead_RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, ad, adlen * sizeof(unsigned char));
tempData[adlen] = 0x01;
packU96FormatToThreePacket(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
packU96FormatToThreePacket((dataFormat + 3), (tempData + 12));
s[3] ^= dataFormat[3];
s[4] ^= dataFormat[4];
s[5] ^= dataFormat[5];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
}
s[9] ^= 0x80000000;
///////////
clen -= CRYPTO_ABYTES;
if (clen) {
while (clen >= aead_RATE) {
packU96FormatToThreePacket(dataFormat, c);
dataFormat_1[0] = s[0] ^ dataFormat[0];
dataFormat_1[1] = s[1] ^ dataFormat[1];
dataFormat_1[2] = s[2] ^ dataFormat[2];
packU96FormatToThreePacket((dataFormat+3), (c+12));
dataFormat_1[3] = s[3] ^ dataFormat[3];
dataFormat_1[4] = s[4] ^ dataFormat[4];
dataFormat_1[5] = s[5] ^ dataFormat[5];
unpackU96FormatToThreePacket(m, dataFormat_1);
unpackU96FormatToThreePacket((m + 12), (dataFormat_1 + 3));
s[0] = dataFormat[0];
s[1] = dataFormat[1];
s[2] = dataFormat[2];
s[3] = dataFormat[3];
s[4] = dataFormat[4];
s[5] = dataFormat[5];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
clen -= aead_RATE;
m += aead_RATE;
c += aead_RATE;
}
unpackU96FormatToThreePacket(tempU8, s);
unpackU96FormatToThreePacket((tempU8+12), (s+3));
for (i = 0; i < clen; ++i, ++m, ++c)
{
*m = tempU8[i] ^ *c;
tempU8[i] = *c;
}
tempU8[i] ^= 0x01;
packU96FormatToThreePacket(s, tempU8);
packU96FormatToThreePacket((s + 3), (tempU8 + 12));
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND384(i);
}
// return tag
unpackU96FormatToThreePacket(tempU8, s);
unpackU96FormatToThreePacket((tempU8 + 12), (s + 3));
if (memcmp((void*)tempU8, (void*)c, CRYPTO_ABYTES)) {
*mlen = 0;
memset(m, 0, sizeof(unsigned char) * (clen - CRYPTO_ABYTES));
return -1;
}
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment