Commit 6a3941ac by Enrico Pozzobon

Merge branch 'email-submissions'

parents 526bda82 2e4a168b
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include "core.h"
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode) {
u32_2 t0, t1;
u64 tmp0, tmp1;
u64 i;
while (len >= RATE) {
tmp0 = U64BIG(*(u64*)in);
t0 = to_bit_interleaving(tmp0);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
tmp1 = U64BIG(*(u64*)(in + 8));
t1 = to_bit_interleaving(tmp1);
s->x1.e ^= t1.e;
s->x1.o ^= t1.o;
if (mode != ASCON_AD) {
tmp0 = from_bit_interleaving(s->x0);
*(u64*)out = U64BIG(tmp0);
tmp1 = from_bit_interleaving(s->x1);
*(u64*)(out + 8) = U64BIG(tmp1);
}
if (mode == ASCON_DEC) {
s->x0 = t0;
s->x1 = t1;
}
P(s, PB_ROUNDS);
in += RATE;
out += RATE;
len -= RATE;
}
tmp0 = 0;
tmp1 = 0;
for (i = 0; i < len; ++i, ++in)
if (i < 8)
tmp0 ^= INS_BYTE64(*in, i);
else
tmp1 ^= INS_BYTE64(*in, i % 8);
in -= len;
if (len < 8)
tmp0 ^= INS_BYTE64(0x80, len);
else
tmp1 ^= INS_BYTE64(0x80, len % 8);
t0 = to_bit_interleaving(tmp0);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
t1 = to_bit_interleaving(tmp1);
s->x1.e ^= t1.e;
s->x1.o ^= t1.o;
if (mode != ASCON_AD) {
tmp0 = from_bit_interleaving(s->x0);
tmp1 = from_bit_interleaving(s->x1);
for (i = 0; i < len; ++i, ++out)
if (i < 8)
*out = EXT_BYTE64(tmp0, i);
else
*out = EXT_BYTE64(tmp1, i % 8);
}
if (mode == ASCON_DEC) {
for (i = 0; i < len; ++i, ++in)
if (i < 8) {
tmp0 &= ~INS_BYTE64(0xff, i);
tmp0 |= INS_BYTE64(*in, i);
} else {
tmp1 &= ~INS_BYTE64(0xff, i % 8);
tmp1 |= INS_BYTE64(*in, i % 8);
}
s->x0 = to_bit_interleaving(tmp0);
s->x1 = to_bit_interleaving(tmp1);
}
}
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode) {
u32_2 K0, K1, N0, N1;
// load key and nonce
K0 = to_bit_interleaving(U64BIG(*(u64*)k));
K1 = to_bit_interleaving(U64BIG(*(u64*)(k + 8)));
N0 = to_bit_interleaving(U64BIG(*(u64*)npub));
N1 = to_bit_interleaving(U64BIG(*(u64*)(npub + 8)));
// initialization
s->x0 = to_bit_interleaving(IV);
s->x1.o = K0.o;
s->x1.e = K0.e;
s->x2.e = K1.e;
s->x2.o = K1.o;
s->x3.e = N0.e;
s->x3.o = N0.o;
s->x4.e = N1.e;
s->x4.o = N1.o;
P(s, PA_ROUNDS);
s->x3.e ^= K0.e;
s->x3.o ^= K0.o;
s->x4.e ^= K1.e;
s->x4.o ^= K1.o;
// process associated data
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_AD);
P(s, PB_ROUNDS);
}
s->x4.e ^= 1;
// process plaintext/ciphertext
process_data(s, out, in, tlen, mode);
// finalization
s->x2.e ^= K0.e;
s->x2.o ^= K0.o;
s->x3.e ^= K1.e;
s->x3.o ^= K1.o;
P(s, PA_ROUNDS);
s->x3.e ^= K0.e;
s->x3.o ^= K0.o;
s->x4.e ^= K1.e;
s->x4.o ^= K1.o;
}
#ifndef CORE_H_
#define CORE_H_
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define ASCON_AD 0
#define ASCON_ENC 1
#define ASCON_DEC 2
#define RATE (128 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \
(u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32)
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode);
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode);
#endif // CORE_H_
#include "core.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
state s;
u32_2 t0, t1;
(void)nsec;
// set plaintext size
*mlen = clen - CRYPTO_ABYTES;
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC);
// verify tag (should be constant time, check compiler output)
t0 = to_bit_interleaving(U64BIG(*(u64*)(c + *mlen)));
t1 = to_bit_interleaving(U64BIG(*(u64*)(c + *mlen + 8)));
if (((s.x3.e ^ t0.e) | (s.x3.o ^ t0.o) | (s.x4.e ^ t1.e) | (s.x4.o ^ t1.o)) !=
0) {
*mlen = 0;
return -1;
}
return 0;
}
#include "core.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
state s;
u64 tmp0, tmp1;
(void)nsec;
// set ciphertext size
*clen = mlen + CRYPTO_ABYTES;
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC);
// set tag
tmp0 = from_bit_interleaving(s.x3);
*(u64*)(c + mlen) = U64BIG(tmp0);
tmp1 = from_bit_interleaving(s.x4);
*(u64*)(c + mlen + 8) = U64BIG(tmp1);
return 0;
}
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
#include "permutations.h"
static const u8 constants[][2] = {
{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, {0x6, 0xc}, {0x3, 0xc},
{0x6, 0x9}, {0x3, 0x9}, {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}};
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving(u64 in) {
u32 hi = (in) >> 32;
u32 lo = (u32)(in);
u32 r0, r1;
u32_2 out;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
out.e = (lo & 0x0000FFFF) | (hi << 16);
out.o = (lo >> 16) | (hi & 0xFFFF0000);
return out;
}
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving(u32_2 in) {
u32 lo = (in.e & 0x0000FFFF) | (in.o << 16);
u32 hi = (in.e >> 16) | (in.o & 0xFFFF0000);
u32 r0, r1;
u64 out;
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
out = (u64)hi << 32 | lo;
return out;
}
void P(state *p, u8 rounds) {
state s = *p;
u32_2 t0, t1, t2, t3, t4;
u32 i, start = START_ROUND(rounds);
for (i = start; i < 12; i++) ROUND(constants[i][0], constants[i][1]);
*p = s;
}
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 e;
u32 o;
} u32_2;
typedef struct {
u32_2 x0;
u32_2 x1;
u32_2 x2;
u32_2 x3;
u32_2 x4;
} state;
#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n)))))
#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n))))
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define START_ROUND(x) (12 - (x))
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving(u64 in);
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving(u32_2 in);
/* clang-format off */
#define ROUND(C_e, C_o) \
do { \
/* round constant */ \
s.x2.e ^= C_e; s.x2.o ^= C_o; \
/* s-box layer */ \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
s.x4.e ^= s.x3.e; s.x4.o ^= s.x3.o; \
s.x2.e ^= s.x1.e; s.x2.o ^= s.x1.o; \
t0.e = s.x0.e; t0.o = s.x0.o; \
t4.e = s.x4.e; t4.o = s.x4.o; \
t3.e = s.x3.e; t3.o = s.x3.o; \
t1.e = s.x1.e; t1.o = s.x1.o; \
t2.e = s.x2.e; t2.o = s.x2.o; \
s.x0.e = t0.e ^ (~t1.e & t2.e); s.x0.o = t0.o ^ (~t1.o & t2.o); \
s.x2.e = t2.e ^ (~t3.e & t4.e); s.x2.o = t2.o ^ (~t3.o & t4.o); \
s.x4.e = t4.e ^ (~t0.e & t1.e); s.x4.o = t4.o ^ (~t0.o & t1.o); \
s.x1.e = t1.e ^ (~t2.e & t3.e); s.x1.o = t1.o ^ (~t2.o & t3.o); \
s.x3.e = t3.e ^ (~t4.e & t0.e); s.x3.o = t3.o ^ (~t4.o & t0.o); \
s.x1.e ^= s.x0.e; s.x1.o ^= s.x0.o; \
s.x3.e ^= s.x2.e; s.x3.o ^= s.x2.o; \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
/* linear layer */ \
t0.e = s.x0.e ^ ROTR32(s.x0.o, 4); t0.o = s.x0.o ^ ROTR32(s.x0.e, 5); \
t1.e = s.x1.e ^ ROTR32(s.x1.e, 11); t1.o = s.x1.o ^ ROTR32(s.x1.o, 11); \
t2.e = s.x2.e ^ ROTR32(s.x2.o, 2); t2.o = s.x2.o ^ ROTR32(s.x2.e, 3); \
t3.e = s.x3.e ^ ROTR32(s.x3.o, 3); t3.o = s.x3.o ^ ROTR32(s.x3.e, 4); \
t4.e = s.x4.e ^ ROTR32(s.x4.e, 17); t4.o = s.x4.o ^ ROTR32(s.x4.o, 17); \
s.x0.e ^= ROTR32(t0.o, 9); s.x0.o ^= ROTR32(t0.e, 10); \
s.x1.e ^= ROTR32(t1.o, 19); s.x1.o ^= ROTR32(t1.e, 20); \
s.x2.e ^= t2.o; s.x2.o ^= ROTR32(t2.e, 1); \
s.x3.e ^= ROTR32(t3.e, 5); s.x3.o ^= ROTR32(t3.o, 5); \
s.x4.e ^= ROTR32(t4.o, 3); s.x4.o ^= ROTR32(t4.e, 4); \
s.x2.e = ~s.x2.e; s.x2.o = ~s.x2.o; \
} while(0)
/* clang-format on */
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include "core.h"
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode) {
u64* x;
u64 i;
while (len >= RATE) {
s->x0 ^= U64BIG(*(u64*)in);
s->x1 ^= U64BIG(*(u64*)(in + 8));
if (mode != ASCON_AD) {
*(u64*)out = U64BIG(s->x0);
*(u64*)(out + 8) = U64BIG(s->x1);
}
if (mode == ASCON_DEC) {
s->x0 = U64BIG(*((u64*)in));
s->x1 = U64BIG(*((u64*)(in + 8)));
}
P(s, PB_ROUNDS);
in += RATE;
out += RATE;
len -= RATE;
}
for (i = 0; i < len; ++i, ++out, ++in) {
if (i < 8)
x = &(s->x0);
else
x = &(s->x1);
*x ^= INS_BYTE64(*in, i % 8);
if (mode != ASCON_AD) *out = EXT_BYTE64(*x, i % 8);
if (mode == ASCON_DEC) {
*x &= ~INS_BYTE64(0xff, i % 8);
*x |= INS_BYTE64(*in, i % 8);
}
}
if (len < 8)
s->x0 ^= INS_BYTE64(0x80, len);
else
s->x1 ^= INS_BYTE64(0x80, len % 8);
}
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode) {
const u64 K0 = U64BIG(*(u64*)k);
const u64 K1 = U64BIG(*(u64*)(k + 8));
const u64 N0 = U64BIG(*(u64*)npub);
const u64 N1 = U64BIG(*(u64*)(npub + 8));
// initialization
s->x0 = IV;
s->x1 = K0;
s->x2 = K1;
s->x3 = N0;
s->x4 = N1;
P(s, PA_ROUNDS);
s->x3 ^= K0;
s->x4 ^= K1;
// process associated data
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_AD);
P(s, PB_ROUNDS);
}
s->x4 ^= 1;
// process plaintext/ciphertext
process_data(s, out, in, tlen, mode);
// finalization
s->x2 ^= K0;
s->x3 ^= K1;
P(s, PA_ROUNDS);
s->x3 ^= K0;
s->x4 ^= K1;
}
#ifndef CORE_H_
#define CORE_H_
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define ASCON_AD 0
#define ASCON_ENC 1
#define ASCON_DEC 2
#define RATE (128 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 8
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \
(u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32)
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode);
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode);
#endif // CORE_H_
#include "core.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
state s;
(void)nsec;
// set plaintext size
*mlen = clen - CRYPTO_ABYTES;
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC);
// verify tag (should be constant time, check compiler output)
if (((s.x3 ^ U64BIG(*(u64*)(c + *mlen))) |
(s.x4 ^ U64BIG(*(u64*)(c + *mlen + 8)))) != 0) {
*mlen = 0;
return -1;
}
return 0;
}
#include "core.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
state s;
(void)nsec;
// set ciphertext size
*clen = mlen + CRYPTO_ABYTES;
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC);
// set tag
*(u64*)(c + mlen) = U64BIG(s.x3);
*(u64*)(c + mlen + 8) = U64BIG(s.x4);
return 0;
}
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
#include "permutations.h"
void P(state *p, u8 rounds) {
state s = *p;
u8 i, start = START_CONSTANT(rounds);
for (i = start; i > 0x4a; i -= 0x0f) ROUND(i);
*p = s;
}
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned long long u64;
typedef struct {
u64 x0, x1, x2, x3, x4;
} state;
#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n)))))
#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n))))
#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
#define START_CONSTANT(x) (((0xf - (12 - (x))) << 4) | (12 - (x)))
#define ROUND(C) \
do { \
state t; \
s.x2 ^= C; \
s.x0 ^= s.x4; \
s.x4 ^= s.x3; \
s.x2 ^= s.x1; \
t.x0 = s.x0; \
t.x4 = s.x4; \
t.x3 = s.x3; \
t.x1 = s.x1; \
t.x2 = s.x2; \
s.x0 = t.x0 ^ ((~t.x1) & t.x2); \
s.x2 = t.x2 ^ ((~t.x3) & t.x4); \
s.x4 = t.x4 ^ ((~t.x0) & t.x1); \
s.x1 = t.x1 ^ ((~t.x2) & t.x3); \
s.x3 = t.x3 ^ ((~t.x4) & t.x0); \
s.x1 ^= s.x0; \
t.x1 = s.x1; \
s.x1 = ROTR64(s.x1, 39); \
s.x3 ^= s.x2; \
t.x2 = s.x2; \
s.x2 = ROTR64(s.x2, 1); \
t.x4 = s.x4; \
t.x2 ^= s.x2; \
s.x2 = ROTR64(s.x2, 6 - 1); \
t.x3 = s.x3; \
t.x1 ^= s.x1; \
s.x3 = ROTR64(s.x3, 10); \
s.x0 ^= s.x4; \
s.x4 = ROTR64(s.x4, 7); \
t.x3 ^= s.x3; \
s.x2 ^= t.x2; \
s.x1 = ROTR64(s.x1, 61 - 39); \
t.x0 = s.x0; \
s.x2 = ~s.x2; \
s.x3 = ROTR64(s.x3, 17 - 10); \
t.x4 ^= s.x4; \
s.x4 = ROTR64(s.x4, 41 - 7); \
s.x3 ^= t.x3; \
s.x1 ^= t.x1; \
s.x0 = ROTR64(s.x0, 19); \
s.x4 ^= t.x4; \
t.x0 ^= s.x0; \
s.x0 = ROTR64(s.x0, 28 - 19); \
s.x0 ^= t.x0; \
} while (0)
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include "core.h"
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode) {
u32_2 t0;
u64 tmp0;
u64 i;
while (len >= RATE) {
tmp0 = U64BIG(*(u64*)in);
t0 = to_bit_interleaving(tmp0);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
if (mode != ASCON_AD) {
tmp0 = from_bit_interleaving(s->x0);
*(u64*)out = U64BIG(tmp0);
}
if (mode == ASCON_DEC) s->x0 = t0;
P(s, PB_ROUNDS);
in += RATE;
out += RATE;
len -= RATE;
}
tmp0 = 0;
for (i = 0; i < len; ++i, ++in) tmp0 |= INS_BYTE64(*in, i);
in -= len;
tmp0 |= INS_BYTE64(0x80, len);
t0 = to_bit_interleaving(tmp0);
s->x0.e ^= t0.e;
s->x0.o ^= t0.o;
if (mode != ASCON_AD) {
tmp0 = from_bit_interleaving(s->x0);
for (i = 0; i < len; ++i, ++out) *out = EXT_BYTE64(tmp0, i);
}
if (mode == ASCON_DEC) {
for (i = 0; i < len; ++i, ++in) {
tmp0 &= ~INS_BYTE64(0xff, i);
tmp0 |= INS_BYTE64(*in, i);
}
s->x0 = to_bit_interleaving(tmp0);
}
}
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode) {
u32_2 K0, K1, N0, N1;
// load key and nonce
K0 = to_bit_interleaving(U64BIG(*(u64*)k));
K1 = to_bit_interleaving(U64BIG(*(u64*)(k + 8)));
N0 = to_bit_interleaving(U64BIG(*(u64*)npub));
N1 = to_bit_interleaving(U64BIG(*(u64*)(npub + 8)));
// initialization
s->x0 = to_bit_interleaving(IV);
s->x1.o = K0.o;
s->x1.e = K0.e;
s->x2.e = K1.e;
s->x2.o = K1.o;
s->x3.e = N0.e;
s->x3.o = N0.o;
s->x4.e = N1.e;
s->x4.o = N1.o;
P(s, PA_ROUNDS);
s->x3.e ^= K0.e;
s->x3.o ^= K0.o;
s->x4.e ^= K1.e;
s->x4.o ^= K1.o;
// process associated data
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_AD);
P(s, PB_ROUNDS);
}
s->x4.e ^= 1;
// process plaintext/ciphertext
process_data(s, out, in, tlen, mode);
// finalization
s->x1.e ^= K0.e;
s->x1.o ^= K0.o;
s->x2.e ^= K1.e;
s->x2.o ^= K1.o;
P(s, PA_ROUNDS);
s->x3.e ^= K0.e;
s->x3.o ^= K0.o;
s->x4.e ^= K1.e;
s->x4.o ^= K1.o;
}
#ifndef CORE_H_
#define CORE_H_
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define ASCON_AD 0
#define ASCON_ENC 1
#define ASCON_DEC 2
#define RATE (64 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \
(u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32)
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode);
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode);
#endif // CORE_H_
#include "core.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
state s;
u32_2 t0, t1;
(void)nsec;
// set plaintext size
*mlen = clen - CRYPTO_ABYTES;
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC);
// verify tag (should be constant time, check compiler output)
t0 = to_bit_interleaving(U64BIG(*(u64*)(c + *mlen)));
t1 = to_bit_interleaving(U64BIG(*(u64*)(c + *mlen + 8)));
if (((s.x3.e ^ t0.e) | (s.x3.o ^ t0.o) | (s.x4.e ^ t1.e) | (s.x4.o ^ t1.o)) !=
0) {
*mlen = 0;
return -1;
}
return 0;
}
#include "core.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
state s;
u64 tmp0, tmp1;
(void)nsec;
// set ciphertext size
*clen = mlen + CRYPTO_ABYTES;
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC);
// set tag
tmp0 = from_bit_interleaving(s.x3);
*(u64*)(c + mlen) = U64BIG(tmp0);
tmp1 = from_bit_interleaving(s.x4);
*(u64*)(c + mlen + 8) = U64BIG(tmp1);
return 0;
}
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
#include "permutations.h"
static const u8 constants[][2] = {
{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, {0x6, 0xc}, {0x3, 0xc},
{0x6, 0x9}, {0x3, 0x9}, {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}};
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving(u64 in) {
u32 hi = (in) >> 32;
u32 lo = (u32)(in);
u32 r0, r1;
u32_2 out;
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
out.e = (lo & 0x0000FFFF) | (hi << 16);
out.o = (lo >> 16) | (hi & 0xFFFF0000);
return out;
}
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving(u32_2 in) {
u32 lo = (in.e & 0x0000FFFF) | (in.o << 16);
u32 hi = (in.e >> 16) | (in.o & 0xFFFF0000);
u32 r0, r1;
u64 out;
r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8);
r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4);
r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2);
r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1);
r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8);
r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4);
r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2);
r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1);
out = (u64)hi << 32 | lo;
return out;
}
void P(state *p, u8 rounds) {
state s = *p;
u32_2 t0, t1, t2, t3, t4;
u32 i, start = START_ROUND(rounds);
for (i = start; i < 12; i++) ROUND(constants[i][0], constants[i][1]);
*p = s;
}
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct {
u32 e;
u32 o;
} u32_2;
typedef struct {
u32_2 x0;
u32_2 x1;
u32_2 x2;
u32_2 x3;
u32_2 x4;
} state;
#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n)))))
#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n))))
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define START_ROUND(x) (12 - (x))
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u32_2 to_bit_interleaving(u64 in);
// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
u64 from_bit_interleaving(u32_2 in);
/* clang-format off */
#define ROUND(C_e, C_o) \
do { \
/* round constant */ \
s.x2.e ^= C_e; s.x2.o ^= C_o; \
/* s-box layer */ \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
s.x4.e ^= s.x3.e; s.x4.o ^= s.x3.o; \
s.x2.e ^= s.x1.e; s.x2.o ^= s.x1.o; \
t0.e = s.x0.e; t0.o = s.x0.o; \
t4.e = s.x4.e; t4.o = s.x4.o; \
t3.e = s.x3.e; t3.o = s.x3.o; \
t1.e = s.x1.e; t1.o = s.x1.o; \
t2.e = s.x2.e; t2.o = s.x2.o; \
s.x0.e = t0.e ^ (~t1.e & t2.e); s.x0.o = t0.o ^ (~t1.o & t2.o); \
s.x2.e = t2.e ^ (~t3.e & t4.e); s.x2.o = t2.o ^ (~t3.o & t4.o); \
s.x4.e = t4.e ^ (~t0.e & t1.e); s.x4.o = t4.o ^ (~t0.o & t1.o); \
s.x1.e = t1.e ^ (~t2.e & t3.e); s.x1.o = t1.o ^ (~t2.o & t3.o); \
s.x3.e = t3.e ^ (~t4.e & t0.e); s.x3.o = t3.o ^ (~t4.o & t0.o); \
s.x1.e ^= s.x0.e; s.x1.o ^= s.x0.o; \
s.x3.e ^= s.x2.e; s.x3.o ^= s.x2.o; \
s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \
/* linear layer */ \
t0.e = s.x0.e ^ ROTR32(s.x0.o, 4); t0.o = s.x0.o ^ ROTR32(s.x0.e, 5); \
t1.e = s.x1.e ^ ROTR32(s.x1.e, 11); t1.o = s.x1.o ^ ROTR32(s.x1.o, 11); \
t2.e = s.x2.e ^ ROTR32(s.x2.o, 2); t2.o = s.x2.o ^ ROTR32(s.x2.e, 3); \
t3.e = s.x3.e ^ ROTR32(s.x3.o, 3); t3.o = s.x3.o ^ ROTR32(s.x3.e, 4); \
t4.e = s.x4.e ^ ROTR32(s.x4.e, 17); t4.o = s.x4.o ^ ROTR32(s.x4.o, 17); \
s.x0.e ^= ROTR32(t0.o, 9); s.x0.o ^= ROTR32(t0.e, 10); \
s.x1.e ^= ROTR32(t1.o, 19); s.x1.o ^= ROTR32(t1.e, 20); \
s.x2.e ^= t2.o; s.x2.o ^= ROTR32(t2.e, 1); \
s.x3.e ^= ROTR32(t3.e, 5); s.x3.o ^= ROTR32(t3.o, 5); \
s.x4.e ^= ROTR32(t4.o, 3); s.x4.o ^= ROTR32(t4.e, 4); \
s.x2.e = ~s.x2.e; s.x2.o = ~s.x2.o; \
} while(0)
/* clang-format on */
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include "core.h"
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode) {
u64 i;
while (len >= RATE) {
s->x0 ^= U64BIG(*(u64*)in);
if (mode != ASCON_AD) *(u64*)out = U64BIG(s->x0);
if (mode == ASCON_DEC) s->x0 = U64BIG(*((u64*)in));
P(s, PB_ROUNDS);
in += RATE;
out += RATE;
len -= RATE;
}
for (i = 0; i < len; ++i, ++out, ++in) {
s->x0 ^= INS_BYTE64(*in, i);
if (mode != ASCON_AD) *out = EXT_BYTE64(s->x0, i);
if (mode == ASCON_DEC) {
s->x0 &= ~INS_BYTE64(0xff, i);
s->x0 |= INS_BYTE64(*in, i);
}
}
s->x0 ^= INS_BYTE64(0x80, len);
}
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode) {
const u64 K0 = U64BIG(*(u64*)k);
const u64 K1 = U64BIG(*(u64*)(k + 8));
const u64 N0 = U64BIG(*(u64*)npub);
const u64 N1 = U64BIG(*(u64*)(npub + 8));
// initialization
s->x0 = IV;
s->x1 = K0;
s->x2 = K1;
s->x3 = N0;
s->x4 = N1;
P(s, PA_ROUNDS);
s->x3 ^= K0;
s->x4 ^= K1;
// process associated data
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_AD);
P(s, PB_ROUNDS);
}
s->x4 ^= 1;
// process plaintext/ciphertext
process_data(s, out, in, tlen, mode);
// finalization
s->x1 ^= K0;
s->x2 ^= K1;
P(s, PA_ROUNDS);
s->x3 ^= K0;
s->x4 ^= K1;
}
#ifndef CORE_H_
#define CORE_H_
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define ASCON_AD 0
#define ASCON_ENC 1
#define ASCON_DEC 2
#define RATE (64 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \
(u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32)
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode);
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode);
#endif // CORE_H_
#include "core.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
state s;
(void)nsec;
// set plaintext size
*mlen = clen - CRYPTO_ABYTES;
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC);
// verify tag (should be constant time, check compiler output)
if (((s.x3 ^ U64BIG(*(u64*)(c + *mlen))) |
(s.x4 ^ U64BIG(*(u64*)(c + *mlen + 8)))) != 0) {
*mlen = 0;
return -1;
}
return 0;
}
#include "core.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
state s;
(void)nsec;
// set ciphertext size
*clen = mlen + CRYPTO_ABYTES;
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC);
// set tag
*(u64*)(c + mlen) = U64BIG(s.x3);
*(u64*)(c + mlen + 8) = U64BIG(s.x4);
return 0;
}
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
#include "permutations.h"
void P(state *p, u8 rounds) {
state s = *p;
u8 i, start = START_CONSTANT(rounds);
for (i = start; i > 0x4a; i -= 0x0f) ROUND(i);
*p = s;
}
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned long long u64;
typedef struct {
u64 x0, x1, x2, x3, x4;
} state;
#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n)))))
#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n))))
#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
#define START_CONSTANT(x) (((0xf - (12 - (x))) << 4) | (12 - (x)))
#define ROUND(C) \
do { \
state t; \
s.x2 ^= C; \
s.x0 ^= s.x4; \
s.x4 ^= s.x3; \
s.x2 ^= s.x1; \
t.x0 = s.x0; \
t.x4 = s.x4; \
t.x3 = s.x3; \
t.x1 = s.x1; \
t.x2 = s.x2; \
s.x0 = t.x0 ^ ((~t.x1) & t.x2); \
s.x2 = t.x2 ^ ((~t.x3) & t.x4); \
s.x4 = t.x4 ^ ((~t.x0) & t.x1); \
s.x1 = t.x1 ^ ((~t.x2) & t.x3); \
s.x3 = t.x3 ^ ((~t.x4) & t.x0); \
s.x1 ^= s.x0; \
t.x1 = s.x1; \
s.x1 = ROTR64(s.x1, 39); \
s.x3 ^= s.x2; \
t.x2 = s.x2; \
s.x2 = ROTR64(s.x2, 1); \
t.x4 = s.x4; \
t.x2 ^= s.x2; \
s.x2 = ROTR64(s.x2, 6 - 1); \
t.x3 = s.x3; \
t.x1 ^= s.x1; \
s.x3 = ROTR64(s.x3, 10); \
s.x0 ^= s.x4; \
s.x4 = ROTR64(s.x4, 7); \
t.x3 ^= s.x3; \
s.x2 ^= t.x2; \
s.x1 = ROTR64(s.x1, 61 - 39); \
t.x0 = s.x0; \
s.x2 = ~s.x2; \
s.x3 = ROTR64(s.x3, 17 - 10); \
t.x4 ^= s.x4; \
s.x4 = ROTR64(s.x4, 41 - 7); \
s.x3 ^= t.x3; \
s.x1 ^= t.x1; \
s.x0 = ROTR64(s.x0, 19); \
s.x4 ^= t.x4; \
t.x0 ^= s.x0; \
s.x0 = ROTR64(s.x0, 28 - 19); \
s.x0 ^= t.x0; \
} while (0)
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#define CRYPTO_KEYBYTES 20
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include "core.h"
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode) {
u64 i;
while (len >= RATE) {
s->x0 ^= U64BIG(*(u64*)in);
if (mode != ASCON_AD) *(u64*)out = U64BIG(s->x0);
if (mode == ASCON_DEC) s->x0 = U64BIG(*((u64*)in));
P(s, PB_ROUNDS);
in += RATE;
out += RATE;
len -= RATE;
}
for (i = 0; i < len; ++i, ++out, ++in) {
s->x0 ^= INS_BYTE64(*in, i);
if (mode != ASCON_AD) *out = EXT_BYTE64(s->x0, i);
if (mode == ASCON_DEC) {
s->x0 &= ~INS_BYTE64(0xff, i);
s->x0 |= INS_BYTE64(*in, i);
}
}
s->x0 ^= INS_BYTE64(0x80, len);
}
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode) {
const u64 K0 = U64BIG(*(u64*)(k + 0)) >> 32;
const u64 K1 = U64BIG(*(u64*)(k + 4));
const u64 K2 = U64BIG(*(u64*)(k + 12));
const u64 N0 = U64BIG(*(u64*)npub);
const u64 N1 = U64BIG(*(u64*)(npub + 8));
// initialization
s->x0 = IV | K0;
s->x1 = K1;
s->x2 = K2;
s->x3 = N0;
s->x4 = N1;
P(s, PA_ROUNDS);
s->x2 ^= K0;
s->x3 ^= K1;
s->x4 ^= K2;
// process associated data
if (adlen) {
process_data(s, (void*)0, ad, adlen, ASCON_AD);
P(s, PB_ROUNDS);
}
s->x4 ^= 1;
// process plaintext/ciphertext
process_data(s, out, in, tlen, mode);
// finalization
s->x1 ^= K0 << 32 | K1 >> 32;
s->x2 ^= K1 << 32 | K2 >> 32;
s->x3 ^= K2 << 32;
P(s, PA_ROUNDS);
s->x3 ^= K1;
s->x4 ^= K2;
}
#ifndef CORE_H_
#define CORE_H_
#include "api.h"
#include "endian.h"
#include "permutations.h"
#define ASCON_AD 0
#define ASCON_ENC 1
#define ASCON_DEC 2
#define RATE (64 / 8)
#define PA_ROUNDS 12
#define PB_ROUNDS 6
#define IV \
((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \
(u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32)
void process_data(state* s, unsigned char* out, const unsigned char* in,
unsigned long long len, u8 mode);
void ascon_core(state* s, unsigned char* out, const unsigned char* in,
unsigned long long tlen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k, u8 mode);
#endif // CORE_H_
#include "core.h"
int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen,
unsigned char* nsec, const unsigned char* c,
unsigned long long clen, const unsigned char* ad,
unsigned long long adlen, const unsigned char* npub,
const unsigned char* k) {
if (clen < CRYPTO_ABYTES) {
*mlen = 0;
return -1;
}
state s;
(void)nsec;
// set plaintext size
*mlen = clen - CRYPTO_ABYTES;
ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC);
// verify tag (should be constant time, check compiler output)
if (((s.x3 ^ U64BIG(*(u64*)(c + *mlen))) |
(s.x4 ^ U64BIG(*(u64*)(c + *mlen + 8)))) != 0) {
*mlen = 0;
return -1;
}
return 0;
}
#include "core.h"
int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec, const unsigned char* npub,
const unsigned char* k) {
state s;
(void)nsec;
// set ciphertext size
*clen = mlen + CRYPTO_ABYTES;
ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC);
// set tag
*(u64*)(c + mlen) = U64BIG(s.x3);
*(u64*)(c + mlen + 8) = U64BIG(s.x4);
return 0;
}
#ifndef ENDIAN_H_
#define ENDIAN_H_
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// macros for big endian machines
#define U64BIG(x) (x)
#define U32BIG(x) (x)
#define U16BIG(x) (x)
#elif defined(_MSC_VER) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
// macros for little endian machines
#define U64BIG(x) \
((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \
(((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \
(((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \
(((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56))
#define U32BIG(x) \
((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \
(((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24))
#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8))
#else
#error "ascon byte order macros not defined in endian.h"
#endif
#endif // ENDIAN_H_
#include "permutations.h"
void P(state *p, u8 rounds) {
state s = *p;
u8 i, start = START_CONSTANT(rounds);
for (i = start; i > 0x4a; i -= 0x0f) ROUND(i);
*p = s;
}
#ifndef PERMUTATIONS_H_
#define PERMUTATIONS_H_
typedef unsigned char u8;
typedef unsigned long long u64;
typedef struct {
u64 x0, x1, x2, x3, x4;
} state;
#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n)))))
#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n))))
#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
#define START_CONSTANT(x) (((0xf - (12 - (x))) << 4) | (12 - (x)))
#define ROUND(C) \
do { \
state t; \
s.x2 ^= C; \
s.x0 ^= s.x4; \
s.x4 ^= s.x3; \
s.x2 ^= s.x1; \
t.x0 = s.x0; \
t.x4 = s.x4; \
t.x3 = s.x3; \
t.x1 = s.x1; \
t.x2 = s.x2; \
s.x0 = t.x0 ^ ((~t.x1) & t.x2); \
s.x2 = t.x2 ^ ((~t.x3) & t.x4); \
s.x4 = t.x4 ^ ((~t.x0) & t.x1); \
s.x1 = t.x1 ^ ((~t.x2) & t.x3); \
s.x3 = t.x3 ^ ((~t.x4) & t.x0); \
s.x1 ^= s.x0; \
t.x1 = s.x1; \
s.x1 = ROTR64(s.x1, 39); \
s.x3 ^= s.x2; \
t.x2 = s.x2; \
s.x2 = ROTR64(s.x2, 1); \
t.x4 = s.x4; \
t.x2 ^= s.x2; \
s.x2 = ROTR64(s.x2, 6 - 1); \
t.x3 = s.x3; \
t.x1 ^= s.x1; \
s.x3 = ROTR64(s.x3, 10); \
s.x0 ^= s.x4; \
s.x4 = ROTR64(s.x4, 7); \
t.x3 ^= s.x3; \
s.x2 ^= t.x2; \
s.x1 = ROTR64(s.x1, 61 - 39); \
t.x0 = s.x0; \
s.x2 = ~s.x2; \
s.x3 = ROTR64(s.x3, 17 - 10); \
t.x4 ^= s.x4; \
s.x4 = ROTR64(s.x4, 41 - 7); \
s.x3 ^= t.x3; \
s.x1 ^= t.x1; \
s.x0 = ROTR64(s.x0, 19); \
s.x4 ^= t.x4; \
t.x0 ^= s.x0; \
s.x0 = ROTR64(s.x0, 28 - 19); \
s.x0 ^= t.x0; \
} while (0)
void P(state *p, u8 rounds);
#endif // PERMUTATIONS_H_
#define CRYPTO_KEYBYTES 16 //
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
#include"crypto_aead.h"
#include"api.h"
#include <string.h>
#define U32BIG(x) (x)
#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0])))
#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n))))
#define sbox(a, b, c, d, e, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; e = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \
}
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
#define packFormat(out,in) {\
t1 = U32BIG(((u32*)in)[0]); \
t2 = U32BIG(((u32*)in)[1]); \
t3 = (t1 ^ (t1 >> 1)) & 0x22222222, t1 ^= t3 ^ (t3 << 1); \
t3 = (t1 ^ (t1 >> 2)) & 0x0C0C0C0C, t1 ^= t3 ^ (t3 << 2); \
t3 = (t1 ^ (t1 >> 4)) & 0x00F000F0, t1 ^= t3 ^ (t3 << 4); \
t3 = (t1 ^ (t1 >> 8)) & 0x0000FF00, t1 ^= t3 ^ (t3 << 8); \
t5 = (t2 ^ (t2 >> 1)) & 0x22222222, t2 ^= t5 ^ (t5 << 1); \
t5 = (t2 ^ (t2 >> 2)) & 0x0C0C0C0C, t2 ^= t5 ^ (t5 << 2); \
t5 = (t2 ^ (t2 >> 4)) & 0x00F000F0, t2 ^= t5 ^ (t5 << 4); \
t5 = (t2 ^ (t2 >> 8)) & 0x0000FF00, t2 ^= t5 ^ (t5 << 8); \
out[0] = (t2 & 0xFFFF0000) | (t1 >> 16); \
out[1] = (t2 << 16) | (t1 & 0x0000FFFF); \
}
#define unpackFormat(out, in) {\
t2 = (in[0] & 0xFFFF0000) | (in[1] >> 16); \
t1 = (in[1] & 0x0000FFFF) | (in[0] << 16); \
t3 = (t1 ^ (t1 >> 8)) & 0x0000FF00, t1 ^= t3 ^ (t3 << 8); \
t3 = (t1 ^ (t1 >> 4)) & 0x00F000F0, t1 ^= t3 ^ (t3 << 4); \
t3 = (t1 ^ (t1 >> 2)) & 0x0C0C0C0C, t1 ^= t3 ^ (t3 << 2); \
t3 = (t1 ^ (t1 >> 1)) & 0x22222222, t1 ^= t3 ^ (t3 << 1); \
t5 = (t2 ^ (t2 >> 8)) & 0x0000FF00, t2 ^= t5 ^ (t5 << 8); \
t5 = (t2 ^ (t2 >> 4)) & 0x00F000F0, t2 ^= t5 ^ (t5 << 4); \
t5 = (t2 ^ (t2 >> 2)) & 0x0C0C0C0C, t2 ^= t5 ^ (t5 << 2); \
t5 = (t2 ^ (t2 >> 1)) & 0x22222222, t2 ^= t5 ^ (t5 << 1); \
*((u64*)out) = ((u64)t2 << 32 | t1); \
}
#define getU32Format(out, in) {\
t1, t2 = U32BIG(((u32*)in)[0]); \
t1 = (t2 ^ (t2 >> 1)) & 0x22222222, t2 ^= t1 ^ (t1 << 1); \
t1 = (t2 ^ (t2 >> 2)) & 0x0C0C0C0C, t2 ^= t1 ^ (t1 << 2); \
t1 = (t2 ^ (t2 >> 4)) & 0x00F000F0, t2 ^= t1 ^ (t1 << 4); \
t1 = (t2 ^ (t2 >> 8)) & 0x0000FF00, t2 ^= t1 ^ (t1 << 8); \
*out = t2; \
}
#define ROUND256( constant6Format,lunNum) {\
s[0] ^= constant6Format[lunNum]>> 4;\
s[1] ^= constant6Format[lunNum]& 0x0f;\
sbox(s[0], s[2], s[4], s[6], s_temp[0], s_temp[2], s_temp[4], s_temp[6]);\
sbox(s[1], s[3], s[5], s[7], s_temp[1], s_temp[3], s_temp[5], s_temp[7]);\
s[0] = s_temp[0];\
s[1] = s_temp[1];\
s[2] = s_temp[3];\
s[3] = LOTR32(s_temp[2], 1);\
s[4] = LOTR32(s_temp[4], 4);\
s[5] = LOTR32(s_temp[5], 4);\
s[6] = LOTR32(s_temp[7], 12);\
s[7] = LOTR32(s_temp[6], 13);\
}
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
#include"auxFormat.h"
#define RATE (64 / 8)
#define PR0_ROUNDS 52
#define PR_ROUNDS 28
#define PRF_ROUNDS 32
unsigned char constant6Format[63] = {
/*constant6_aead_128v1:*/
0x1,
0x10,
0x2,
0x20,
0x4,
0x41,
0x11,
0x12,
0x22,
0x24,
0x45,
0x50,
0x3,
0x30,
0x6,
0x61,
0x15,
0x53,
0x33,
0x36,
0x67,
0x74,
0x46,
0x60,
0x5,
0x51,
0x13,
0x32,
0x26,
0x65,
0x54,
0x42,
0x21,
0x14,
0x43,
0x31,
0x16,
0x63,
0x35,
0x57,
0x72,
0x27,
0x75,
0x56,
0x62,
0x25,
0x55,
0x52,
0x23,
0x34,
0x47,
0x70,
0x7,
0x71,
0x17,
0x73,
0x37,
0x77,
0x76,
0x66,
0x64,
0x44,
0x40,
};
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
unsigned int i, j;
u32 s[8] = { 0 };
u32 dataFormat[2] = { 0 };
u8 tempData[8];
u32 s_temp[8] = { 0 };
u32 t1, t2, t3, t5, t6, t8, t9, t11;
*clen = mlen + CRYPTO_ABYTES;
//initialization
packFormat(s, npub);
packFormat((s + 2), (npub + 8));
packFormat((s + 4), k);
packFormat((s + 6), (k + 8));
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND256(constant6Format,i);
}
// process associated data
if (adlen) {
//rlen = adlen;
while (adlen >= RATE) {
packFormat(dataFormat, ad);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND256(constant6Format, i);
}
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, ad, adlen * sizeof(unsigned char));
tempData[adlen] = 0x01;
packFormat(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND256(constant6Format, i);
}
}
s[6] ^= 0x80000000;
if (mlen) {
while (mlen >= RATE) {
packFormat(dataFormat, m);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
unpackFormat(c, s);
for (i = 0; i < PR_ROUNDS; i++) {
ROUND256(constant6Format, i);
}
mlen -= RATE;
m += RATE;
c += RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, m, mlen * sizeof(unsigned char));
tempData[mlen]= 0x01;
packFormat(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
unpackFormat(tempData, s);
memcpy(c, tempData, mlen * sizeof(unsigned char));
c +=mlen;
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND256(constant6Format, i);
}
// return tag
unpackFormat(tempData, s);
memcpy(c, tempData, sizeof(tempData));
unpackFormat(tempData,(s + 2));
memcpy(c+8, tempData, sizeof(tempData));
// unpackFormat((c), s);
// unpackFormat((c+8),(s + 2));
return 0;
}
int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec, const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k) {
u8 i, j;
// initialization
//256/32=8
u32 s[8] = { 0 };
u32 dataFormat[4] = { 0 };
u32 dataFormat_1[2] = { 0 };
u8 tempU8[32] = { 0 };
u8 tempData[8];
u32 s_temp[8] = { 0 };
u32 t1, t2, t3, t5, t6, t8, t9, t11;
*mlen = clen - CRYPTO_ABYTES;
if (clen < CRYPTO_ABYTES)
return -1;
//initialization
packFormat(s, npub);
packFormat((s + 2), (npub + 8));
packFormat((s + 4), k);
packFormat((s + 6), (k + 8));
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND256(constant6Format, i);
}
// process associated data
if (adlen) {
while (adlen >= RATE) {
packFormat(dataFormat, ad);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND256(constant6Format, i);
}
adlen -= RATE;
ad += RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, ad, adlen * sizeof(unsigned char));
tempData[adlen] = 0x01;
packFormat(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND256(constant6Format, i);
}
}
s[6] ^= 0x80000000;
// process c
clen = clen - CRYPTO_KEYBYTES;
if (clen) {
while (clen >= RATE) {
packFormat(dataFormat, c);
dataFormat_1[0] = s[0] ^ dataFormat[0];
dataFormat_1[1] = s[1] ^ dataFormat[1];
unpackFormat(m, dataFormat_1);
s[0] = dataFormat[0];
s[1] = dataFormat[1];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND256(constant6Format, i);
}
clen -= RATE;
m += RATE;
c += RATE;
}
unpackFormat(tempU8, s);
for (i = 0; i < clen; ++i, ++m, ++c)
{
*m = tempU8[i]^ *c;
tempU8[i] = *c;
}
tempU8[i] ^= 0x01;
packFormat(s, tempU8);
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND256(constant6Format, i);
}
// return tag
packFormat(dataFormat, c);
packFormat((dataFormat + 2), (c +8));
if (dataFormat[0] != s[0] || dataFormat[1] != s[1] || dataFormat[2] != s[2] || dataFormat[3] != s[3]) {
return -1;
}
return 0;
}
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
//#include<malloc.h>
#include"crypto_aead.h"
#include"api.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#define U32BIG(x) (x)
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0])))
#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n))))
#define puckU32ToThree(x){\
x &= 0x92492492;\
x = (x | (x << 2)) & 0xc30c30c3;\
x = (x | (x << 4)) & 0xf00f00f0;\
x = (x | (x << 8)) & 0xff0000ff;\
x = (x | (x << 16)) & 0xfff00000;\
}
#define unpuckU32ToThree(x){\
x &= 0xfff00000;\
x = (x | (x >> 16)) & 0xff0000ff;\
x = (x | (x >> 8)) & 0xf00f00f0;\
x = (x | (x >> 4)) & 0xc30c30c3;\
x = (x | (x >> 2)) & 0x92492492;\
}
#define packU32FormatToThreePacket( out, in) {\
t2 = U32BIG(((u32*)in)[0]); \
t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \
t2 = t2 << 2; \
temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \
puckU32ToThree(temp2[0]); \
puckU32ToThree(temp2[1]); \
puckU32ToThree(temp2[2]); \
out[0] = (temp2[0] >> 22); \
out[1] = (((u32)t2_64) << 10) | (temp2[1] >> 22); \
out[2] =(((u32)t2_65) << 10) | (temp2[2] >> 22); \
}
#define packU96FormatToThreePacket(out, in) {\
t9 = U32BIG(((u32*)in)[2]); \
t1 = U32BIG(((u32*)in)[1]); \
t2 = U32BIG(((u32*)in)[0]); \
t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \
t1 = t1 << 1; \
t2 = t2 << 2; \
temp0[0] = t9; temp0[1] = t9 << 1; temp0[2] = t9 << 2; \
puckU32ToThree(temp0[0]); \
puckU32ToThree(temp0[1]); \
puckU32ToThree(temp0[2]); \
temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; \
puckU32ToThree(temp1[0]); \
puckU32ToThree(temp1[1]); \
puckU32ToThree(temp1[2]); \
temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \
puckU32ToThree(temp2[0]); \
puckU32ToThree(temp2[1]); \
puckU32ToThree(temp2[2]); \
out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22); \
out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); \
out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); \
}
#define unpackU32FormatToThreePacket(out, in) {\
temp2[0] = (in[0] & 0x000003ff) << 22; \
t2_64 = ((in[1] & 0x00000400) << 21); \
temp2[1] = (in[1] & 0x000003ff) << 22; \
t2_65 = ((in[2] & 0x00000400) << 20); \
temp2[2] = (in[2] & 0x000003ff) << 22; \
unpuckU32ToThree(temp2[0]); \
unpuckU32ToThree(temp2[1]); \
unpuckU32ToThree(temp2[2]); \
t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); \
*(u32*)(out) = U32BIG(t2); \
}
#define unpackU96FormatToThreePacket( out, in) {\
temp0[0] = in[0] & 0xffe00000; \
temp1[0] = (in[0] & 0x001ffc00) << 11; \
temp2[0] = (in[0] & 0x000003ff) << 22; \
temp0[1] = in[1] & 0xffe00000; \
temp1[1] = (in[1] & 0x001ff800) << 11; \
t2_64 = ((in[1] & 0x00000400) << 21); \
temp2[1] = (in[1] & 0x000003ff) << 22; \
temp0[2] = in[2] & 0xffc00000; \
t1_32 = ((in[2] & 0x00200000) << 10); \
temp1[2] = (in[2] & 0x001ff800) << 11; \
t2_65 = ((in[2] & 0x00000400) << 20); \
temp2[2] = (in[2] & 0x000003ff) << 22; \
unpuckU32ToThree(temp0[0]); \
unpuckU32ToThree(temp0[1]); \
unpuckU32ToThree(temp0[2]); \
t9 = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2; \
unpuckU32ToThree(temp1[0]); \
unpuckU32ToThree(temp1[1]); \
unpuckU32ToThree(temp1[2]); \
t1 = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1); \
unpuckU32ToThree(temp2[0]); \
unpuckU32ToThree(temp2[1]); \
unpuckU32ToThree(temp2[2]); \
t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); \
*(u32*)(out) = U32BIG(t2); \
*(u32*)(out + 4) = U32BIG(t1); \
*(u32*)(out + 8) = U32BIG(t9); \
}
#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0])))
#define sbox(a, b, c, d, e, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; e = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \
}
#define U96_BIT_LOTR32_1(t0,t1,t2,t3,t4,t5){\
t3= t1;\
t4 = t2;\
t5 = LOTR32(t0, 1); \
}
#define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\
t3= LOTR32(t2, 2);\
t4 =LOTR32(t0, 3);\
t5 = LOTR32(t1, 3); \
}
#define U96_BIT_LOTR32_55(t0,t1,t2,t3,t4,t5){\
t3= LOTR32(t1, 18); \
t4 = LOTR32(t2, 18);\
t5 = LOTR32(t0, 19); \
}
/*
s0 s1 s2
s3 s4 s5
s6 s7 s8
s9 s10 s11
*/
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
#include"auxFormat.h"
#define aead_RATE (192 / 8)
#define PR0_ROUNDS 76
#define PR_ROUNDS 28
#define PRF_ROUNDS 32
unsigned char constant7Format[127] = {
/*constant7Format[127]:*/
0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90,
0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2,
0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99,
0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b,
0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92,
0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8,
0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,0xcf,0x5e,0xd3,0x9f,
0xdc,0xc6,0x16,0x91,0x8d,0x4c,0x43,0x1a,0xd0,0x87,0x1c,0xc1,
0x0f,0x59,0xca,0x57,0x9b,0xdd,0xce,0x56,0x93,0x9d,0xcc,0x46,
0x13,0x98,0xc5,0x0e,0x51,0x8a,0x55,0x8b,0x5d,0xcb,0x5f,0xdb,
0xdf,0xde,0xd6,0x96,0x94,0x84,0x04, };
#define ROUND384(lunNum) {\
s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;\
s[1] ^= (constant7Format[lunNum] >> 3) & 0x7;\
s[2] ^= constant7Format[lunNum] & 0x7;\
sbox(s[0], s[3], s[6], s[9] , s_temp[0], s_temp[3], s_temp[6], s_temp[9]);\
sbox(s[1], s[4], s[7], s[10], s_temp[1], s_temp[4], s_temp[7], s_temp[10]);\
sbox(s[2], s[5], s[8], s[11], s_temp[2], s_temp[5], s_temp[8], s_temp[11]);\
s[0] = s_temp[0], s[1] = s_temp[1], s[2] = s_temp[2];\
U96_BIT_LOTR32_1(s_temp[3], s_temp [4], s_temp[ 5], s[3], s[4], s[5]);\
U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\
U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\
}
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
u8 i;
u32 s[12] = { 0 };
u8 tempData[24] = { 0 };
u32 dataFormat[6] = { 0 };
u32 s_temp[12] = { 0 };
u32 t1, t2, t3, t5, t6, t8, t9, t11;
u32 t1_32, t2_64, t2_65;
u32 temp0[3] = { 0 };
u32 temp1[3] = { 0 };
u32 temp2[3] = { 0 };
*clen = mlen + CRYPTO_ABYTES;
// initialization
packU96FormatToThreePacket(s, npub);
memcpy(tempData, npub+12, sizeof(unsigned char)*4);
memcpy(tempData+4, k, sizeof(unsigned char) * 16);
packU96FormatToThreePacket((s + 3), tempData);
packU96FormatToThreePacket((s + 6), (tempData+12));
s[9] = 0x80000000;
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND384(i);
}
// process associated data
if (adlen) {
// rlen = adlen;
while (adlen >= aead_RATE) {
packU96FormatToThreePacket(dataFormat, ad);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
packU96FormatToThreePacket((dataFormat+3), (ad+12));
s[3] ^= dataFormat[3];
s[4] ^= dataFormat[4];
s[5] ^= dataFormat[5];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
adlen -= aead_RATE;
ad += aead_RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, ad, adlen * sizeof(unsigned char));
tempData[adlen] = 0x01;
packU96FormatToThreePacket(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
packU96FormatToThreePacket((dataFormat + 3), (tempData + 12));
s[3] ^= dataFormat[3];
s[4] ^= dataFormat[4];
s[5] ^= dataFormat[5];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
}
s[9] ^= 0x80000000;
if (mlen) {
//rlen = mlen;
while (mlen >= aead_RATE) {
packU96FormatToThreePacket(dataFormat, m);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
packU96FormatToThreePacket((dataFormat + 3), (m + 12));
s[3] ^= dataFormat[3];
s[4] ^= dataFormat[4];
s[5] ^= dataFormat[5];
unpackU96FormatToThreePacket(c, s);
unpackU96FormatToThreePacket((c+12), (s+3));
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
mlen -= aead_RATE;
m += aead_RATE;
c += aead_RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, m, mlen * sizeof(unsigned char));
tempData[mlen]= 0x01;
packU96FormatToThreePacket(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
packU96FormatToThreePacket((dataFormat + 3), (tempData + 12));
s[3] ^= dataFormat[3];
s[4] ^= dataFormat[4];
s[5] ^= dataFormat[5];
unpackU96FormatToThreePacket(tempData, s);
unpackU96FormatToThreePacket((tempData+12), (s+3));
memcpy(c, tempData, mlen * sizeof(unsigned char));
c += mlen;
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND384(i);
}
// return tag
unpackU96FormatToThreePacket(c, s);
unpackU96FormatToThreePacket(tempData, (s + 3));
memcpy(c+12, tempData, sizeof(unsigned char) * 4);
return 0;
}
int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec, const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k) {
u8 i, j;
u32 s[12] = { 0 };
u32 s_temp[12] = { 0 };
u32 dataFormat[12] = { 0 };
u32 dataFormat_1[12] = { 0 };
u8 tempData[24] = { 0 };
u8 tempU8[24] = { 0 };
u32 t1, t2, t3, t5, t6, t8, t9, t11;
u32 t1_32, t2_64, t2_65;
u32 temp0[3] = { 0 };
u32 temp1[3] = { 0 };
u32 temp2[3] = { 0 }; *mlen = clen - CRYPTO_ABYTES;
if (clen < CRYPTO_ABYTES)
return -1;
// initialization
packU96FormatToThreePacket(s, npub);
memcpy(tempData, npub + 12, sizeof(unsigned char) * 4);
memcpy(tempData + 4, k, sizeof(unsigned char) * 16);
packU96FormatToThreePacket((s + 3), tempData);
packU96FormatToThreePacket((s + 6), (tempData + 12));
s[9] = 0x80000000;
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND384(i);
}
// process associated data
if (adlen) {
// rlen = adlen;
while (adlen >= aead_RATE) {
packU96FormatToThreePacket(dataFormat, ad);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
packU96FormatToThreePacket((dataFormat + 3), (ad + 12));
s[3] ^= dataFormat[3];
s[4] ^= dataFormat[4];
s[5] ^= dataFormat[5];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
adlen -= aead_RATE;
ad += aead_RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, ad, adlen * sizeof(unsigned char));
tempData[adlen] = 0x01;
packU96FormatToThreePacket(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
packU96FormatToThreePacket((dataFormat + 3), (tempData + 12));
s[3] ^= dataFormat[3];
s[4] ^= dataFormat[4];
s[5] ^= dataFormat[5];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
}
s[9] ^= 0x80000000;
///////////
clen -= CRYPTO_ABYTES;
if (clen) {
while (clen >= aead_RATE) {
packU96FormatToThreePacket(dataFormat, c);
dataFormat_1[0] = s[0] ^ dataFormat[0];
dataFormat_1[1] = s[1] ^ dataFormat[1];
dataFormat_1[2] = s[2] ^ dataFormat[2];
packU96FormatToThreePacket((dataFormat+3), (c+12));
dataFormat_1[3] = s[3] ^ dataFormat[3];
dataFormat_1[4] = s[4] ^ dataFormat[4];
dataFormat_1[5] = s[5] ^ dataFormat[5];
unpackU96FormatToThreePacket(m, dataFormat_1);
unpackU96FormatToThreePacket((m + 12), (dataFormat_1 + 3));
s[0] = dataFormat[0];
s[1] = dataFormat[1];
s[2] = dataFormat[2];
s[3] = dataFormat[3];
s[4] = dataFormat[4];
s[5] = dataFormat[5];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
clen -= aead_RATE;
m += aead_RATE;
c += aead_RATE;
}
unpackU96FormatToThreePacket(tempU8, s);
unpackU96FormatToThreePacket((tempU8+12), (s+3));
for (i = 0; i < clen; ++i, ++m, ++c)
{
*m = tempU8[i] ^ *c;
tempU8[i] = *c;
}
tempU8[i] ^= 0x01;
packU96FormatToThreePacket(s, tempU8);
packU96FormatToThreePacket((s + 3), (tempU8 + 12));
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND384(i);
}
// return tag
unpackU96FormatToThreePacket(tempU8, s);
unpackU96FormatToThreePacket((tempU8+12), (s+3));
if (U32BIG(((u32*)tempU8)[0]) != U32BIG(((u32*)c)[0]) ||
U32BIG(((u32*)tempU8)[1]) != U32BIG(((u32*)c)[1]) ||
U32BIG(((u32*)tempU8)[2]) != U32BIG(((u32*)c)[2]) ||
U32BIG(((u32*)tempU8)[3]) != U32BIG(((u32*)c)[3]) ){
return -1;
}
return 0;
}
#define CRYPTO_KEYBYTES 24
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 24
#define CRYPTO_ABYTES 24
#define CRYPTO_NOOVERLAP 1
#include"crypto_aead.h"
#include"api.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#define U32BIG(x) (x)
#define U16BIG(x) (x)
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
typedef unsigned long long u64;
#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0])))
#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n))))
#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0])))
#define sbox(a, b, c, d, e, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; e = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \
}
#define puckU32ToThree(x){\
x &= 0x92492492;\
x = (x | (x << 2)) & 0xc30c30c3;\
x = (x | (x << 4)) & 0xf00f00f0;\
x = (x | (x << 8)) & 0xff0000ff;\
x = (x | (x << 16)) & 0xfff00000;\
}
#define unpuckU32ToThree(x){\
x &= 0xfff00000;\
x = (x | (x >> 16)) & 0xff0000ff;\
x = (x | (x >> 8)) & 0xf00f00f0;\
x = (x | (x >> 4)) & 0xc30c30c3;\
x = (x | (x >> 2)) & 0x92492492;\
}
#define packU48FormatToThreePacket( out, in) {\
t1 = (u32)U16BIG(*(u16*)(in + 4)); \
t2 = U32BIG(*(u32*)(in)); \
t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \
t1 = t1 << 1; \
t2 = t2 << 2; \
temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; \
puckU32ToThree(temp1[0]); \
puckU32ToThree(temp1[1]); \
puckU32ToThree(temp1[2]); \
temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \
puckU32ToThree(temp2[0]); \
puckU32ToThree(temp2[1]); \
puckU32ToThree(temp2[2]); \
out[0] = (temp1[0] >> 11) | (temp2[0] >> 22); \
out[1] = (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); \
out[2] = (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); \
}
#define packU96FormatToThreePacket(out, in) {\
t9 = U32BIG(((u32*)in)[2]); \
t1 = U32BIG(((u32*)in)[1]); \
t2 = U32BIG(((u32*)in)[0]); \
t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \
t1 = t1 << 1; \
t2 = t2 << 2; \
temp0[0] = t9; temp0[1] = t9 << 1; temp0[2] = t9 << 2; \
puckU32ToThree(temp0[0]); \
puckU32ToThree(temp0[1]); \
puckU32ToThree(temp0[2]); \
temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; \
puckU32ToThree(temp1[0]); \
puckU32ToThree(temp1[1]); \
puckU32ToThree(temp1[2]); \
temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \
puckU32ToThree(temp2[0]); \
puckU32ToThree(temp2[1]); \
puckU32ToThree(temp2[2]); \
out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22); \
out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); \
out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); \
}
#define unpackU96FormatToThreePacket( out, in) {\
temp0[0] = in[0] & 0xffe00000; \
temp1[0] = (in[0] & 0x001ffc00) << 11; \
temp2[0] = (in[0] & 0x000003ff) << 22; \
temp0[1] = in[1] & 0xffe00000; \
temp1[1] = (in[1] & 0x001ff800) << 11; \
t2_64 = ((in[1] & 0x00000400) << 21); \
temp2[1] = (in[1] & 0x000003ff) << 22; \
temp0[2] = in[2] & 0xffc00000; \
t1_32 = ((in[2] & 0x00200000) << 10); \
temp1[2] = (in[2] & 0x001ff800) << 11; \
t2_65 = ((in[2] & 0x00000400) << 20); \
temp2[2] = (in[2] & 0x000003ff) << 22; \
unpuckU32ToThree(temp0[0]); \
unpuckU32ToThree(temp0[1]); \
unpuckU32ToThree(temp0[2]); \
t9 = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2; \
unpuckU32ToThree(temp1[0]); \
unpuckU32ToThree(temp1[1]); \
unpuckU32ToThree(temp1[2]); \
t1 = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1); \
unpuckU32ToThree(temp2[0]); \
unpuckU32ToThree(temp2[1]); \
unpuckU32ToThree(temp2[2]); \
t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); \
*(u32*)(out) = U32BIG(t2); \
*(u32*)(out + 4) = U32BIG(t1); \
*(u32*)(out + 8) = U32BIG(t9); \
}
#define U96_BIT_LOTR32_1(t0,t1,t2,t3,t4,t5){\
t3= t1;\
t4 = t2;\
t5 = LOTR32(t0, 1); \
}
#define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\
t3= LOTR32(t2, 2);\
t4 =LOTR32(t0, 3);\
t5 = LOTR32(t1, 3); \
}
#define U96_BIT_LOTR32_55(t0,t1,t2,t3,t4,t5){\
t3= LOTR32(t1, 18); \
t4 = LOTR32(t2, 18);\
t5 = LOTR32(t0, 19); \
}
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
#include"auxFormat.h"
#define aead_RATE (96 / 8)
#define PR0_ROUNDS 76
#define PR_ROUNDS 40
#define PRF_ROUNDS 44
unsigned char constant7Format[127] = {
/*constant7Format[127]:*/
0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90,
0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2,
0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99,
0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b,
0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92,
0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8,
0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,0xcf,0x5e,0xd3,0x9f,
0xdc,0xc6,0x16,0x91,0x8d,0x4c,0x43,0x1a,0xd0,0x87,0x1c,0xc1,
0x0f,0x59,0xca,0x57,0x9b,0xdd,0xce,0x56,0x93,0x9d,0xcc,0x46,
0x13,0x98,0xc5,0x0e,0x51,0x8a,0x55,0x8b,0x5d,0xcb,0x5f,0xdb,
0xdf,0xde,0xd6,0x96,0x94,0x84,0x04, };
#define ROUND384(lunNum) {\
s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;\
s[1] ^= (constant7Format[lunNum] >> 3) & 0x7;\
s[2] ^= constant7Format[lunNum] & 0x7;\
sbox(s[0], s[3], s[6], s[9] , s_temp[0], s_temp[3], s_temp[6], s_temp[9]);\
sbox(s[1], s[4], s[7], s[10], s_temp[1], s_temp[4], s_temp[7], s_temp[10]);\
sbox(s[2], s[5], s[8], s[11], s_temp[2], s_temp[5], s_temp[8], s_temp[11]);\
s[0] = s_temp[0], s[1] = s_temp[1], s[2] = s_temp[2];\
U96_BIT_LOTR32_1(s_temp[3], s_temp [4], s_temp[ 5], s[3], s[4], s[5]);\
U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\
U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\
}
int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
const unsigned char *k) {
u8 i;
u32 s[12] = { 0 };
u32 dataFormat[3] = { 0 };
u8 tempData[12] = { 0 };
u32 s_temp[12] = { 0 };
u32 t1, t2, t3, t5, t6, t8, t9, t11;
u32 t1_32, t2_64, t2_65;
u32 temp0[3] = { 0 };
u32 temp1[3] = { 0 };
u32 temp2[3] = { 0 };
*clen = mlen + CRYPTO_ABYTES;
// initialization
packU96FormatToThreePacket(s, npub);
packU96FormatToThreePacket((s + 3), (npub + 12));
packU96FormatToThreePacket((s + 6), k);
packU96FormatToThreePacket((s + 9), (k + 12));
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND384(i);
}
// process associated data
if (adlen) {
// rlen = adlen;
while (adlen >= aead_RATE) {
packU96FormatToThreePacket(dataFormat, ad);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
adlen -= aead_RATE;
ad += aead_RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, ad, adlen);
tempData[adlen] = 0x01;
packU96FormatToThreePacket(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
}
s[9] ^= 0x80000000;
if (mlen) {
//rlen = mlen;
while (mlen >= aead_RATE) {
packU96FormatToThreePacket(dataFormat, m);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
unpackU96FormatToThreePacket(c, s);
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
mlen -= aead_RATE;
m += aead_RATE;
c += aead_RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, m, mlen);
tempData[mlen] = 0x01;
packU96FormatToThreePacket(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
unpackU96FormatToThreePacket(tempData, s);
memcpy(c, tempData, mlen);
c += mlen;
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND384(i);
}
// return tag
unpackU96FormatToThreePacket(c, s);
unpackU96FormatToThreePacket((c + 12), (s + 3));
return 0;
}
int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen,
unsigned char *nsec, const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub, const unsigned char *k) {
u8 i, j;
u32 s[12] = { 0 };
u32 dataFormat[6] = { 0 };
u32 dataFormat_1[3] = { 0 };
u8 tempData[12] = { 0 };
u8 tempU8[48] = { 0 };
u32 s_temp[12] = { 0 };
u32 t1, t2, t3, t5, t6, t8, t9, t11;
u32 t1_32, t2_64, t2_65;
u32 temp0[3] = { 0 };
u32 temp1[3] = { 0 };
u32 temp2[3] = { 0 };
*mlen = clen - CRYPTO_ABYTES;
if (clen < CRYPTO_ABYTES)
return -1;
// initialization
packU96FormatToThreePacket(s, npub);
packU96FormatToThreePacket((s + 3), (npub + 12));
packU96FormatToThreePacket((s + 6), k);
packU96FormatToThreePacket((s + 9), (k + 12));
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND384(i);
}
// process associated data
if (adlen) {
// rlen = adlen;
while (adlen >= aead_RATE) {
packU96FormatToThreePacket(dataFormat, ad);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
adlen -= aead_RATE;
ad += aead_RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, ad, adlen);
tempData[adlen] = 0x01;
packU96FormatToThreePacket(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
}
s[9] ^= 0x80000000;
clen -= CRYPTO_ABYTES;
if (clen) {
while (clen >= aead_RATE) {
packU96FormatToThreePacket(dataFormat, c);
dataFormat_1[0] = s[0] ^ dataFormat[0];
dataFormat_1[1] = s[1] ^ dataFormat[1];
dataFormat_1[2] = s[2] ^ dataFormat[2];
unpackU96FormatToThreePacket(m, dataFormat_1);
s[0] = dataFormat[0];
s[1] = dataFormat[1];
s[2] = dataFormat[2];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND384(i);
}
clen -= aead_RATE;
m += aead_RATE;
c += aead_RATE;
}
unpackU96FormatToThreePacket(tempU8, s);
for (i = 0; i < clen; ++i, ++m, ++c)
{
*m = tempU8[i] ^ *c;
tempU8[i] = *c;
}
tempU8[i] ^= 0x01;
packU96FormatToThreePacket(s, tempU8);
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND384(i);
}
// return tag
packU96FormatToThreePacket(dataFormat, c);
packU96FormatToThreePacket((dataFormat + 3), (c + 12));
if (dataFormat[0] != s[0] || dataFormat[1] != s[1] || dataFormat[2] != s[2] || dataFormat[3] != s[3]
|| dataFormat[4] != s[4] || dataFormat[5] != s[5]) {
return -1;
}
//////////
return 0;
}
#define CRYPTO_KEYBYTES 32
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 32
#define CRYPTO_ABYTES 32
#define CRYPTO_NOOVERLAP 1
#include"crypto_aead.h"
#include"api.h"
#include <string.h>
#define U32BIG(x) (x)
#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0])))
#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n))))
#define sbox(a, b, c, d, e, f, g, h) \
{ \
t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; e = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \
}
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned long long u64;
void printU8(char name[], u8 var[], long len, int offset);
#define puck32(in)\
{\
t9 = (in ^ (in >> 1)) & 0x22222222; in ^= t9 ^ (t9 << 1);\
t9 = (in ^ (in >> 2)) & 0x0C0C0C0C; in ^= t9 ^ (t9 << 2);\
t9 = (in ^ (in >> 4)) & 0x00F000F0; in ^= t9 ^ (t9 << 4);\
t9 = (in ^ (in >> 8)) & 0x0000FF00; in ^= t9 ^ (t9 << 8);\
}
#define unpuck32(t0){\
t9 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= t9 ^ (t9 << 8); \
t9 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= t9 ^ (t9 << 4); \
t9 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= t9 ^ (t9 << 2); \
t9 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= t9 ^ (t9 << 1); \
}
#define packU128FormatToFourPacket(out,in) {\
t8 = U32BIG(((u32*)in)[0]); \
t1 = U32BIG(((u32*)in)[1]); \
t2 = U32BIG(((u32*)in)[2]); \
t3 = U32BIG(((u32*)in)[3]); \
puck32(t8); puck32(t8); \
puck32(t1); puck32(t1); \
puck32(t2); puck32(t2); \
puck32(t3); puck32(t3); \
out[3] = t3 & 0xff000000 | ((t2 >> 8) & 0x00ff0000) | ((t1 >> 16) & 0x0000ff00) | (t8 >> 24); \
out[2] = ((t3 << 8) & 0xff000000) | (t2 & 0x00ff0000) | ((t1 >> 8) & 0x0000ff00) | ((t8 >> 16) & 0x000000ff); \
out[1] = ((t3 << 16) & 0xff000000) | ((t2 << 8) & 0x00ff0000) | (t1 & 0x0000ff00) | ((t8 >> 8) & 0x000000ff); \
out[0] = ((t3 << 24) & 0xff000000) | ((t2 << 16) & 0x00ff0000) | ((t1 << 8) & 0x0000ff00) | (t8 & 0x000000ff); \
}
#define unpackU128FormatToFourPacket( out, in) {\
memcpy(dataFormat, in, sizeof(unsigned int) * 4); \
t3 = dataFormat[3] & 0xff000000 | ((dataFormat[2] >> 8) & 0x00ff0000) | ((dataFormat[1] >> 16) & 0x0000ff00) | (dataFormat[0] >> 24); \
t2 = ((dataFormat[3] << 8) & 0xff000000) | (dataFormat[2] & 0x00ff0000) | ((dataFormat[1] >> 8) & 0x0000ff00) | ((dataFormat[0] >> 16) & 0x000000ff); \
t1 = ((dataFormat[3] << 16) & 0xff000000) | ((dataFormat[2] << 8) & 0x00ff0000) | (dataFormat[1] & 0x0000ff00) | ((dataFormat[0] >> 8) & 0x000000ff); \
t8 = ((dataFormat[3] << 24) & 0xff000000) | ((dataFormat[2] << 16) & 0x00ff0000) | ((dataFormat[1] << 8) & 0x0000ff00) | (dataFormat[0] & 0x000000ff); \
unpuck32(t8); unpuck32(t8); \
unpuck32(t1); unpuck32(t1); \
unpuck32(t2); unpuck32(t2); \
unpuck32(t3); unpuck32(t3); \
((u32*)out)[0] = U32BIG(t8); \
((u32*)out)[1] = U32BIG(t1); \
((u32*)out)[2] = U32BIG(t2); \
((u32*)out)[3] = U32BIG(t3); \
}
#define packU64FormatToFourPacket( out, in) {\
t1 = U32BIG(((u32*)in)[0]); \
t2 = U32BIG(((u32*)in)[1]); \
puck32(t1); \
puck32(t1); \
puck32(t2); \
puck32(t2); \
out[3] = ((t2 >> 16) & 0x0000ff00) | ((t1 >> 24)); \
out[2] = ((t2 >> 8) & 0x0000ff00) | ((t1 >> 16) & 0x000000ff); \
out[1] = (t2 & 0x0000ff00) | ((t1 >> 8) & 0x000000ff); \
out[0] = ((t2 << 8) & 0x0000ff00) | (t1 & 0x000000ff); \
}
#define BIT_LOTR32_1(t0,t1,t2,t3,t4,t5,t6,t7){\
t4= LOTR32(t3, 1);\
t5 = t0;\
t6 = t1; \
t7 = t2; \
}
#define BIT_LOTR32_16(t0,t1,t2,t3,t4,t5,t6,t7){\
t4= LOTR32(t0, 4);\
t5 = LOTR32(t1, 4);\
t6 = LOTR32(t2, 4); \
t7 = LOTR32(t3, 4); \
}
#define BIT_LOTR32_25(t0,t1,t2,t3,t4,t5,t6,t7){\
t4= LOTR32(t3, 7);\
t5 = LOTR32(t0, 6);\
t6 = LOTR32(t1, 6); \
t7 = LOTR32(t2, 6); \
}
#define ROUND512( arr,lunNum) {\
s[3] ^= (arr[lunNum] >> 6) & 0x3;\
s[2] ^= (arr[lunNum] >> 4) & 0x3;\
s[1] ^= (arr[lunNum] >> 2) & 0x3;\
s[0] ^= arr[lunNum] & 0x3;\
sbox(s[0], s[4], s[8], s[12], s_temp[0], s_temp[4], s_temp[8], s_temp[12]);\
sbox(s[1], s[5], s[9], s[13], s_temp[1], s_temp[5], s_temp[9], s_temp[13]);\
sbox(s[2], s[6], s[10], s[14], s_temp[2], s_temp[6], s_temp[10], s_temp[14]);\
sbox(s[3], s[7], s[11], s[15], s_temp[3], s_temp[7], s_temp[11], s_temp[15]);\
s[0] = s_temp[0], s[1] = s_temp[1], s[2] = s_temp[2], s[3] = s_temp[3];\
BIT_LOTR32_1(s_temp[4], s_temp[5], s_temp[6], s_temp[7], s[4], s[5], s[6], s[7]);\
BIT_LOTR32_16(s_temp[8], s_temp[9], s_temp[10], s_temp[11], s[8], s[9], s[10], s[11]);\
BIT_LOTR32_25(s_temp[12], s_temp[13], s_temp[14], s_temp[15], s[12], s[13], s[14], s[15]);\
}
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
);
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
);
#include"auxFormat.h"
#define aead_RATE (128 / 8)
#define PR0_ROUNDS 100
#define PR_ROUNDS 52
#define PRF_ROUNDS 56
unsigned char constant7Format_aead[127] = {
/*constant7_aead_256*/
0x1,
0x4,
0x10,
0x40,
0x2,
0x8,
0x21,
0x5,
0x14,
0x50,
0x42,
0xa,
0x29,
0x24,
0x11,
0x44,
0x12,
0x48,
0x23,
0xd,
0x35,
0x55,
0x56,
0x5a,
0x6b,
0x2e,
0x38,
0x60,
0x3,
0xc,
0x31,
0x45,
0x16,
0x58,
0x63,
0xf,
0x3d,
0x74,
0x53,
0x4e,
0x3b,
0x6c,
0x32,
0x49,
0x27,
0x1d,
0x75,
0x57,
0x5e,
0x7b,
0x6e,
0x3a,
0x68,
0x22,
0x9,
0x25,
0x15,
0x54,
0x52,
0x4a,
0x2b,
0x2c,
0x30,
0x41,
0x6,
0x18,
0x61,
0x7,
0x1c,
0x71,
0x47,
0x1e,
0x79,
0x66,
0x1b,
0x6d,
0x36,
0x59,
0x67,
0x1f,
0x7d,
0x76,
0x5b,
0x6f,
0x3e,
0x78,
0x62,
0xb,
0x2d,
0x34,
0x51,
0x46,
0x1a,
0x69,
0x26,
0x19,
0x65,
0x17,
0x5c,
0x73,
0x4f,
0x3f,
0x7c,
0x72,
0x4b,
0x2f,
0x3c,
0x70,
0x43,
0xe,
0x39,
0x64,
0x13,
0x4c,
0x33,
0x4d,
0x37,
0x5d,
0x77,
0x5f,
0x7f,
0x7e,
0x7a,
0x6a,
0x2a,
0x28,
0x20,
};
int crypto_aead_encrypt(
unsigned char *c, unsigned long long *clen,
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec,
const unsigned char *npub,
const unsigned char *k
) {
u32 i, j;
u32 s_temp[16] = { 0 };
u32 t1, t2, t3, t5, t6, t8, t9, t11;
// initialization
u32 s[16] = { 0 };
u32 dataFormat[4] = { 0 };
u8 tempData[16] = {0};
*clen = mlen + CRYPTO_ABYTES;
//initialization
packU128FormatToFourPacket(s, npub);
packU128FormatToFourPacket((s + 4), (npub + 16));
packU128FormatToFourPacket((s + 8), k);
packU128FormatToFourPacket((s + 12), (k + 16));
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND512(constant7Format_aead,i);
}
// process associated data
if (adlen) {
while (adlen >= aead_RATE) {
packU128FormatToFourPacket(dataFormat, ad);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
s[3] ^= dataFormat[3];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND512(constant7Format_aead, i);
}
adlen -= aead_RATE;
ad += aead_RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, ad, adlen * sizeof(unsigned char));
tempData[adlen] = 0x01;
packU128FormatToFourPacket(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
s[3] ^= dataFormat[3];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND512(constant7Format_aead, i);
}
}
s[15] ^= 0x80000000;
if (mlen) {
while (mlen >= aead_RATE) {
packU128FormatToFourPacket(dataFormat, m);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
s[3] ^= dataFormat[3];
unpackU128FormatToFourPacket(c, s);
for (i = 0; i < PR_ROUNDS; i++) {
ROUND512(constant7Format_aead, i);
}
mlen -= aead_RATE;
m += aead_RATE;
c += aead_RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, m, mlen * sizeof(unsigned char));
tempData[mlen]= 0x01;
packU128FormatToFourPacket(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
s[3] ^= dataFormat[3];
unpackU128FormatToFourPacket(tempData, s);
memcpy(c, tempData, mlen * sizeof(unsigned char));
c += mlen;
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND512(constant7Format_aead, i);
}
// return tag
unpackU128FormatToFourPacket(c, s);
unpackU128FormatToFourPacket((c+16), (s+4));
return 0;
}
int crypto_aead_decrypt(
unsigned char *m, unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c, unsigned long long clen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k
){
u32 s_temp[16] = { 0 };
u32 t1, t2, t3, t5, t6, t8, t9, t11;
u8 i, j;
// initialization
u32 s[16] = { 0 };
u32 dataFormat[4] = { 0 };
u32 dataFormat_1[4] = { 0 };
u32 dataFormat_2[4] = { 0 };
u8 tempData[16] = { 0 };
u8 tempU8[64] = { 0 };
if (clen < CRYPTO_ABYTES)
return -1;
*mlen = clen - CRYPTO_ABYTES;
//initialization
packU128FormatToFourPacket(s, npub);
packU128FormatToFourPacket((s + 4), (npub + 16));
packU128FormatToFourPacket((s + 8), k);
packU128FormatToFourPacket((s + 12), (k + 16));
for (i = 0; i < PR0_ROUNDS; i++) {
ROUND512(constant7Format_aead, i);
}
// process associated data
if (adlen) {
while (adlen >= aead_RATE) {
packU128FormatToFourPacket(dataFormat, ad);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
s[3] ^= dataFormat[3];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND512(constant7Format_aead, i);
}
adlen -= aead_RATE;
ad += aead_RATE;
}
memset(tempData, 0, sizeof(tempData));
memcpy(tempData, ad, adlen * sizeof(unsigned char));
tempData[adlen] = 0x01;
packU128FormatToFourPacket(dataFormat, tempData);
s[0] ^= dataFormat[0];
s[1] ^= dataFormat[1];
s[2] ^= dataFormat[2];
s[3] ^= dataFormat[3];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND512(constant7Format_aead, i);
}
}
s[15] ^= 0x80000000;
clen = clen - CRYPTO_KEYBYTES;
if (clen) {
while (clen >= aead_RATE) {
packU128FormatToFourPacket(dataFormat_2, c);
dataFormat_1[0] = s[0] ^ dataFormat_2[0];
dataFormat_1[1] = s[1] ^ dataFormat_2[1];
dataFormat_1[2] = s[2] ^ dataFormat_2[2];
dataFormat_1[3] = s[3] ^ dataFormat_2[3];
unpackU128FormatToFourPacket(m, dataFormat_1);
s[0] = dataFormat_2[0];
s[1] = dataFormat_2[1];
s[2] = dataFormat_2[2];
s[3] = dataFormat_2[3];
for (i = 0; i < PR_ROUNDS; i++) {
ROUND512(constant7Format_aead, i);
}
clen -= aead_RATE;
m += aead_RATE;
c += aead_RATE;
}
unpackU128FormatToFourPacket(tempU8, s);
for (i = 0; i < clen; ++i, ++m, ++c)
{
*m = tempU8[i] ^ *c;
tempU8[i] = *c;
}
tempU8[i] ^= 0x01;
packU128FormatToFourPacket(s, tempU8);
}
// finalization
for (i = 0; i < PRF_ROUNDS; i++) {
ROUND512(constant7Format_aead, i);
}
// return tag
packU128FormatToFourPacket(dataFormat, c);
packU128FormatToFourPacket(dataFormat_1, (c + 16));
if (dataFormat[0] != s[0] || dataFormat[1] != s[1] || dataFormat[2] != s[2] || dataFormat[3] != s[3]
|| dataFormat_1[0] != s[4] || dataFormat_1[1] != s[5] || dataFormat_1[2] != s[6] || dataFormat_1[3] != s[7]) {
return -1;
}
return 0;
}
\ No newline at end of file
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
/*
* Date: 29 November 2018
* Contact: Thomas Peyrin - thomas.peyrin@gmail.com
* Mustafa Khairallah - mustafam001@e.ntu.edu.sg
*/
#include "crypto_aead.h"
#include "api.h"
#include "skinny.h"
#include <stdio.h>
#include <stdlib.h>
void pad (const unsigned char* m, unsigned char* mp, int len8) {
#ifdef ___ENABLE_DWORD_CAST
if (0 == len8) {
*(uint64_t*)(&mp[0]) = 0;
*(uint64_t*)(&mp[8]) = 0;
} else if (8 > len8) {
*(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]) & (0xffffffffffffffff >> (64 - len8*8));
*(uint64_t*)(&mp[8]) = 0;
mp[15] = len8;
} else if (8 == len8) {
*(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]);
*(uint64_t*)(&mp[8]) = 0;
mp[15] = 8;
} else if (16 > len8) {
*(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]);
*(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]) & (0xffffffffffffffff >> (128 - len8*8));
mp[15] = len8;
} else {
*(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]);
*(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]);
}
#else
if (0 == len8) {
*(uint32_t*)(&mp[0]) = 0;
*(uint32_t*)(&mp[4]) = 0;
*(uint32_t*)(&mp[8]) = 0;
*(uint32_t*)(&mp[12]) = 0;
} else if (4 > len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]) & (0xffffffff >> (32 - len8*8));
*(uint32_t*)(&mp[4]) = 0;
*(uint32_t*)(&mp[8]) = 0;
*(uint32_t*)(&mp[12]) = 0;
mp[15] = len8;
} else if (4 == len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = 0;
*(uint32_t*)(&mp[8]) = 0;
*(uint32_t*)(&mp[12]) = 0;
mp[15] = 4;
} else if (8 > len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]) & (0xffffffff >> (64 - len8*8));
*(uint32_t*)(&mp[8]) = 0;
*(uint32_t*)(&mp[12]) = 0;
mp[15] = len8;
} else if (8 == len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]);
*(uint32_t*)(&mp[8]) = 0;
*(uint32_t*)(&mp[12]) = 0;
mp[15] = 8;
} else if (12 > len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]);
*(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]) & (0xffffffff >> (96 - len8*8));
*(uint32_t*)(&mp[12]) = 0;
mp[15] = len8;
} else if (12 == len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]);
*(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]);
*(uint32_t*)(&mp[12]) = 0;
mp[15] = 12;
} else if (16 > len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]);
*(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]);
*(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]) & (0xffffffff >> (128 - len8*8));
mp[15] = len8;
} else {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]);
*(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]);
*(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]);
}
#endif
}
void g8A (unsigned char* s, unsigned char* c) {
#ifdef ___ENABLE_DWORD_CAST
uint64_t s0 = *(uint64_t*)(&s[0]);
uint64_t s1 = *(uint64_t*)(&s[8]);
uint64_t c0, c1;
c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080);
c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080);
*(uint64_t*)(&c[0]) = c0;
*(uint64_t*)(&c[8]) = c1;
#else
uint32_t s0 = *(uint32_t*)(&s[0]);
uint32_t s1 = *(uint32_t*)(&s[4]);
uint32_t s2 = *(uint32_t*)(&s[8]);
uint32_t s3 = *(uint32_t*)(&s[12]);
uint32_t c0, c1, c2, c3;
c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080);
c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080);
c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080);
c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080);
*(uint32_t*)(&c[0]) = c0;
*(uint32_t*)(&c[4]) = c1;
*(uint32_t*)(&c[8]) = c2;
*(uint32_t*)(&c[12]) = c3;
#endif
}
void g8A_for_Tag_Generation (unsigned char* s, unsigned char* c) {
#ifdef ___ENABLE_DWORD_CAST
uint64_t s0 = *(uint64_t*)(&s[0]);
uint64_t s1 = *(uint64_t*)(&s[8]);
uint64_t c0, c1;
c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080);
c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080);
// use byte access because of memory alignment.
// c is not always in word(4 byte) alignment.
c[0] = c0 &0xFF;
c[1] = (c0>>8) &0xFF;
c[2] = (c0>>16)&0xFF;
c[3] = (c0>>24)&0xFF;
c[4] = (c0>>32)&0xFF;
c[5] = (c0>>40)&0xFF;
c[6] = (c0>>48)&0xFF;
c[7] = c0>>56;
c[8] = c1 &0xFF;
c[9] = (c1>>8) &0xFF;
c[10] = (c1>>16)&0xFF;
c[11] = (c1>>24)&0xFF;
c[12] = (c1>>32)&0xFF;
c[13] = (c1>>40)&0xFF;
c[14] = (c1>>48)&0xFF;
c[15] = c1>>56;
#else
uint32_t s0 = *(uint32_t*)(&s[0]);
uint32_t s1 = *(uint32_t*)(&s[4]);
uint32_t s2 = *(uint32_t*)(&s[8]);
uint32_t s3 = *(uint32_t*)(&s[12]);
uint32_t c0, c1, c2, c3;
c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080);
c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080);
c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080);
c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080);
// use byte access because of memory alignment.
// c is not always in word(4 byte) alignment.
c[0] = c0 &0xFF;
c[1] = (c0>>8) &0xFF;
c[2] = (c0>>16)&0xFF;
c[3] = c0>>24;
c[4] = c1 &0xFF;
c[5] = (c1>>8) &0xFF;
c[6] = (c1>>16)&0xFF;
c[7] = c1>>24;
c[8] = c2 &0xFF;
c[9] = (c2>>8) &0xFF;
c[10] = (c2>>16)&0xFF;
c[11] = c2>>24;
c[12] = c3 &0xFF;
c[13] = (c3>>8) &0xFF;
c[14] = (c3>>16)&0xFF;
c[15] = c3>>24;
#endif
}
void rho_ad_eqov16 (
const unsigned char* m,
unsigned char* s) {
#ifdef ___ENABLE_DWORD_CAST
*(uint64_t*)(&s[0]) ^= *(uint64_t*)(&m[0]);
*(uint64_t*)(&s[8]) ^= *(uint64_t*)(&m[8]);
#else
*(uint32_t*)(&s[0]) ^= *(uint32_t*)(&m[0]);
*(uint32_t*)(&s[4]) ^= *(uint32_t*)(&m[4]);
*(uint32_t*)(&s[8]) ^= *(uint32_t*)(&m[8]);
*(uint32_t*)(&s[12]) ^= *(uint32_t*)(&m[12]);
#endif
}
void rho_ad_ud16 (
const unsigned char* m,
unsigned char* s,
int len8) {
unsigned char mp [16];
pad(m,mp,len8);
#ifdef ___ENABLE_DWORD_CAST
*(uint64_t*)(&s[0]) ^= *(uint64_t*)(&mp[0]);
*(uint64_t*)(&s[8]) ^= *(uint64_t*)(&mp[8]);
#else
*(uint32_t*)(&s[0]) ^= *(uint32_t*)(&mp[0]);
*(uint32_t*)(&s[4]) ^= *(uint32_t*)(&mp[4]);
*(uint32_t*)(&s[8]) ^= *(uint32_t*)(&mp[8]);
*(uint32_t*)(&s[12]) ^= *(uint32_t*)(&mp[12]);
#endif
}
void rho_eqov16 (
const unsigned char* m,
unsigned char* c,
unsigned char* s) {
g8A(s,c);
#ifdef ___ENABLE_DWORD_CAST
uint64_t c0 = *(uint64_t*)(&c[0]);
uint64_t c1 = *(uint64_t*)(&c[8]);
uint64_t s0 = *(uint64_t*)(&s[0]);
uint64_t s1 = *(uint64_t*)(&s[8]);
uint64_t m0 = *(uint64_t*)(&m[0]);
uint64_t m1 = *(uint64_t*)(&m[8]);
s0 ^= m0;
s1 ^= m1;
c0 ^= m0;
c1 ^= m1;
*(uint64_t*)(&s[0]) = s0;
*(uint64_t*)(&s[8]) = s1;
*(uint64_t*)(&c[0]) = c0;
*(uint64_t*)(&c[8]) = c1;
#else
uint32_t c0 = *(uint32_t*)(&c[0]);
uint32_t c1 = *(uint32_t*)(&c[4]);
uint32_t c2 = *(uint32_t*)(&c[8]);
uint32_t c3 = *(uint32_t*)(&c[12]);
uint32_t s0 = *(uint32_t*)(&s[0]);
uint32_t s1 = *(uint32_t*)(&s[4]);
uint32_t s2 = *(uint32_t*)(&s[8]);
uint32_t s3 = *(uint32_t*)(&s[12]);
uint32_t m0 = *(uint32_t*)(&m[0]);
uint32_t m1 = *(uint32_t*)(&m[4]);
uint32_t m2 = *(uint32_t*)(&m[8]);
uint32_t m3 = *(uint32_t*)(&m[12]);
s0 ^= m0;
s1 ^= m1;
s2 ^= m2;
s3 ^= m3;
c0 ^= m0;
c1 ^= m1;
c2 ^= m2;
c3 ^= m3;
*(uint32_t*)(&s[0]) = s0;
*(uint32_t*)(&s[4]) = s1;
*(uint32_t*)(&s[8]) = s2;
*(uint32_t*)(&s[12]) = s3;
*(uint32_t*)(&c[0]) = c0;
*(uint32_t*)(&c[4]) = c1;
*(uint32_t*)(&c[8]) = c2;
*(uint32_t*)(&c[12]) = c3;
#endif
}
void rho_ud16 (
const unsigned char* m,
unsigned char* c,
unsigned char* s,
int len8) {
unsigned char mp [16];
pad(m,mp,len8);
g8A(s,c);
#ifdef ___ENABLE_DWORD_CAST
uint64_t mp0 = *(uint64_t*)&mp[0];
uint64_t mp1 = *(uint64_t*)&mp[8];
uint64_t c0 = *(uint64_t*)&c[0];
uint64_t c1 = *(uint64_t*)&c[8];
*(uint64_t*)(&s[0]) ^= mp0;
*(uint64_t*)(&s[8]) ^= mp1;
if (0 == len8) {
c0 = 0;
c1 = 0;
} else if (8 > len8) {
c0 = c0 ^ (mp0 & 0xffffffffffffffff >> (64 - (len8*8)));
c0 = c0 ^ (c0 & 0xffffffffffffffff << ( (len8*8)));
c1 = 0;
} else if (8 == len8) {
c0 = c0 ^ mp0;
c1 = 0;
} else if (16 > len8) {
len8 -= 8;
c0 = c0 ^ mp0;
c1 = c1 ^ (mp1 & 0xffffffffffffffff >> (64 - (len8*8)));
c1 = c1 ^ (c1 & 0xffffffffffffffff << ( (len8*8)));
} else {
c0 = c0 ^ mp0;
c1 = c1 ^ mp1;
}
*(uint64_t*)&c[0] = c0;
*(uint64_t*)&c[8] = c1;
#else
uint32_t mp0 = *(uint32_t*)&mp[0];
uint32_t mp1 = *(uint32_t*)&mp[4];
uint32_t mp2 = *(uint32_t*)&mp[8];
uint32_t mp3 = *(uint32_t*)&mp[12];
uint32_t c0 = *(uint32_t*)&c[0];
uint32_t c1 = *(uint32_t*)&c[4];
uint32_t c2 = *(uint32_t*)&c[8];
uint32_t c3 = *(uint32_t*)&c[12];
*(uint32_t*)(&s[0]) ^= mp0;
*(uint32_t*)(&s[4]) ^= mp1;
*(uint32_t*)(&s[8]) ^= mp2;
*(uint32_t*)(&s[12]) ^= mp3;
if (0 == len8) {
c0 = 0;
c1 = 0;
c2 = 0;
c3 = 0;
} else if (4 > len8) {
c0 = c0 ^ (mp0 & 0xffffffff >> (32 - (len8*8)));
c0 = c0 ^ (c0 & 0xffffffff << ( (len8*8)));
c1 = 0;
c2 = 0;
c3 = 0;
} else if (4 == len8) {
c0 = c0 ^ mp0;
c1 = 0;
c2 = 0;
c3 = 0;
} else if (8 > len8) {
len8 -= 4;
c0 = c0 ^ mp0;
c1 = c1 ^ (mp1 & 0xffffffff >> (32 - (len8*8)));
c1 = c1 ^ (c1 & 0xffffffff << ( (len8*8)));
c2 = 0;
c3 = 0;
} else if (8 == len8) {
c0 = c0 ^ mp0;
c1 = c1 ^ mp1;
c2 = 0;
c3 = 0;
} else if (12 > len8) {
len8 -= 8;
c0 = c0 ^ mp0;
c1 = c1 ^ mp1;
c2 = c2 ^ (mp2 & 0xffffffff >> (32 - (len8*8)));
c2 = c2 ^ (c2 & 0xffffffff << ( (len8*8)));
c3 = 0;
} else if (12 == len8) {
c0 = c0 ^ mp0;
c1 = c1 ^ mp1;
c2 = c2 ^ mp2;
c3 = 0;
} else if (16 > len8) {
len8 -= 12;
c0 = c0 ^ mp0;
c1 = c1 ^ mp1;
c2 = c2 ^ mp2;
c3 = c3 ^ (mp3 & 0xffffffff >> (32 - (len8*8)));
c3 = c3 ^ (c3 & 0xffffffff << ( (len8*8)));
} else {
c0 = c0 ^ mp0;
c1 = c1 ^ mp1;
c2 = c2 ^ mp2;
c3 = c3 ^ mp3;
}
*(uint32_t*)&c[0] = c0;
*(uint32_t*)&c[4] = c1;
*(uint32_t*)&c[8] = c2;
*(uint32_t*)&c[12] = c3;
#endif
}
void irho_eqov16 (
unsigned char* m,
const unsigned char* c,
unsigned char* s) {
g8A(s,m);
#ifdef ___ENABLE_DWORD_CAST
uint64_t c0 = *(uint64_t*)(&c[0]);
uint64_t c1 = *(uint64_t*)(&c[8]);
uint64_t s0 = *(uint64_t*)(&s[0]);
uint64_t s1 = *(uint64_t*)(&s[8]);
uint64_t m0 = *(uint64_t*)(&m[0]);
uint64_t m1 = *(uint64_t*)(&m[8]);
s0 ^= c0 ^ m0;
s1 ^= c1 ^ m1;
m0 ^= c0;
m1 ^= c1;
*(uint64_t*)(&s[0]) = s0;
*(uint64_t*)(&s[8]) = s1;
*(uint64_t*)(&m[0]) = m0;
*(uint64_t*)(&m[8]) = m1;
#else
uint32_t c0 = *(uint32_t*)(&c[0]);
uint32_t c1 = *(uint32_t*)(&c[4]);
uint32_t c2 = *(uint32_t*)(&c[8]);
uint32_t c3 = *(uint32_t*)(&c[12]);
uint32_t s0 = *(uint32_t*)(&s[0]);
uint32_t s1 = *(uint32_t*)(&s[4]);
uint32_t s2 = *(uint32_t*)(&s[8]);
uint32_t s3 = *(uint32_t*)(&s[12]);
uint32_t m0 = *(uint32_t*)(&m[0]);
uint32_t m1 = *(uint32_t*)(&m[4]);
uint32_t m2 = *(uint32_t*)(&m[8]);
uint32_t m3 = *(uint32_t*)(&m[12]);
s0 ^= c0 ^ m0;
s1 ^= c1 ^ m1;
s2 ^= c2 ^ m2;
s3 ^= c3 ^ m3;
m0 ^= c0;
m1 ^= c1;
m2 ^= c2;
m3 ^= c3;
*(uint32_t*)(&s[0]) = s0;
*(uint32_t*)(&s[4]) = s1;
*(uint32_t*)(&s[8]) = s2;
*(uint32_t*)(&s[12]) = s3;
*(uint32_t*)(&m[0]) = m0;
*(uint32_t*)(&m[4]) = m1;
*(uint32_t*)(&m[8]) = m2;
*(uint32_t*)(&m[12]) = m3;
#endif
}
void irho_ud16 (
unsigned char* m,
const unsigned char* c,
unsigned char* s,
int len8) {
unsigned char cp [16];
pad(c,cp,len8);
g8A(s,m);
#ifdef ___ENABLE_DWORD_CAST
uint64_t cp0 = *(uint64_t*)&cp[0];
uint64_t cp1 = *(uint64_t*)&cp[8];
uint64_t m0 = *(uint64_t*)&m[0];
uint64_t m1 = *(uint64_t*)&m[8];
uint64_t s0 = *(uint64_t*)&s[0];
uint64_t s1 = *(uint64_t*)&s[8];
s0 ^= cp0;
s1 ^= cp1;
if (0 == len8) {
m0 = 0;
m1 = 0;
} else if (8 > len8) {
s0 = s0 ^ (m0 & 0xffffffffffffffff >> (64 - (len8*8)));
m0 = m0 ^ (cp0 & 0xffffffffffffffff >> (64 - (len8*8)));
m0 = m0 ^ (m0 & 0xffffffffffffffff << ( (len8*8)));
m1 = 0;
} else if (8 == len8) {
s0 = s0 ^ m0;
m0 = m0 ^ cp0;
m1 = 0;
} else if (16 > len8) {
len8 -= 8;
s0 = s0 ^ m0;
s1 = s1 ^ (m1 & 0xffffffffffffffff >> (64 - (len8*8)));
m0 = m0 ^ cp0;
m1 = m1 ^ (cp1 & 0xffffffffffffffff >> (64 - (len8*8)));
m1 = m1 ^ (m1 & 0xffffffffffffffff << ( (len8*8)));
} else {
s0 = s0 ^ m0;
s1 = s1 ^ m1;
m0 = m0 ^ cp0;
m1 = m1 ^ cp1;
}
*(uint64_t*)&s[0] = s0;
*(uint64_t*)&s[8] = s1;
*(uint64_t*)&m[0] = m0;
*(uint64_t*)&m[8] = m1;
#else
uint32_t cp0 = *(uint32_t*)&cp[0];
uint32_t cp1 = *(uint32_t*)&cp[4];
uint32_t cp2 = *(uint32_t*)&cp[8];
uint32_t cp3 = *(uint32_t*)&cp[12];
uint32_t m0 = *(uint32_t*)&m[0];
uint32_t m1 = *(uint32_t*)&m[4];
uint32_t m2 = *(uint32_t*)&m[8];
uint32_t m3 = *(uint32_t*)&m[12];
uint32_t s0 = *(uint32_t*)&s[0];
uint32_t s1 = *(uint32_t*)&s[4];
uint32_t s2 = *(uint32_t*)&s[8];
uint32_t s3 = *(uint32_t*)&s[12];
s0 ^= cp0;
s1 ^= cp1;
s2 ^= cp2;
s3 ^= cp3;
if (0 == len8) {
m0 = 0;
m1 = 0;
m2 = 0;
m3 = 0;
} else if (4 > len8) {
s0 = s0 ^ (m0 & 0xffffffff >> (32 - (len8*8)));
m0 = m0 ^ (cp0 & 0xffffffff >> (32 - (len8*8)));
m0 = m0 ^ (m0 & 0xffffffff << ( (len8*8)));
m1 = 0;
m2 = 0;
m3 = 0;
} else if (4 == len8) {
s0 = s0 ^ m0;
m0 = m0 ^ cp0;
m1 = 0;
m2 = 0;
m3 = 0;
} else if (8 > len8) {
len8 -= 4;
s0 = s0 ^ m0;
s1 = s1 ^ (m1 & 0xffffffff >> (32 - (len8*8)));
m0 = m0 ^ cp0;
m1 = m1 ^ (cp1 & 0xffffffff >> (32 - (len8*8)));
m1 = m1 ^ (m1 & 0xffffffff << ( (len8*8)));
m2 = 0;
m3 = 0;
} else if (8 == len8) {
s0 = s0 ^ m0;
s1 = s1 ^ m1;
m0 = m0 ^ cp0;
m1 = m1 ^ cp1;
m2 = 0;
m3 = 0;
} else if (12 > len8) {
len8 -= 8;
s0 = s0 ^ m0;
s1 = s1 ^ m1;
s2 = s2 ^ (m2 & 0xffffffff >> (32 - (len8*8)));
m0 = m0 ^ cp0;
m1 = m1 ^ cp1;
m2 = m2 ^ (cp2 & 0xffffffff >> (32 - (len8*8)));
m2 = m2 ^ (m2 & 0xffffffff << ( (len8*8)));
m3 = 0;
} else if (12 == len8) {
s0 = s0 ^ m0;
s1 = s1 ^ m1;
s2 = s2 ^ m2;
m0 = m0 ^ cp0;
m1 = m1 ^ cp1;
m2 = m2 ^ cp2;
m3 = 0;
} else if (16 > len8) {
len8 -= 12;
s0 = s0 ^ m0;
s1 = s1 ^ m1;
s2 = s2 ^ m2;
s3 = s3 ^ (m3 & 0xffffffff >> (32 - (len8*8)));
m0 = m0 ^ cp0;
m1 = m1 ^ cp1;
m2 = m2 ^ cp2;
m3 = m3 ^ (cp3 & 0xffffffff >> (32 - (len8*8)));
m3 = m3 ^ (m3 & 0xffffffff << ( (len8*8)));
} else {
s0 = s0 ^ m0;
s1 = s1 ^ m1;
s2 = s2 ^ m2;
s3 = s3 ^ m3;
m0 = m0 ^ cp0;
m1 = m1 ^ cp1;
m2 = m2 ^ cp2;
m3 = m3 ^ cp3;
}
*(uint32_t*)&s[0] = s0;
*(uint32_t*)&s[4] = s1;
*(uint32_t*)&s[8] = s2;
*(uint32_t*)&s[12] = s3;
*(uint32_t*)&m[0] = m0;
*(uint32_t*)&m[4] = m1;
*(uint32_t*)&m[8] = m2;
*(uint32_t*)&m[12] = m3;
#endif
}
void reset_lfsr_gf56 (unsigned char* CNT) {
#ifdef ___ENABLE_DWORD_CAST
*(uint64_t*)(&CNT[0]) = 0x0000000000000001; // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0
#else
*(uint32_t*)(&CNT[0]) = 0x00000001; // CNT3 CNT2 CNT1 CNT0
*(uint32_t*)(&CNT[4]) = 0x00000000; // CNT7 CNT6 CNT5 CNT4
#endif
}
void lfsr_gf56 (unsigned char* CNT) {
#ifdef ___ENABLE_DWORD_CAST
uint64_t C0;
uint64_t fb0;
C0 = *(uint64_t*)(&CNT[0]); // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0
fb0 = 0;
if (CNT[6] & 0x80) {
fb0 = 0x95;
}
C0 = C0 << 1 ^ fb0;
*(uint64_t*)(&CNT[0]) = C0;
#else
uint32_t C0;
uint32_t C1;
uint32_t fb0;
C0 = *(uint32_t*)(&CNT[0]); // CNT3 CNT2 CNT1 CNT0
C1 = *(uint32_t*)(&CNT[4]); // CNT7 CNT6 CNT5 CNT4
fb0 = 0;
if (CNT[6] & 0x80) {
fb0 = 0x95;
}
C1 = C1 << 1 | C0 >> 31;
C0 = C0 << 1 ^ fb0;
*(uint32_t*)(&CNT[0]) = C0;
*(uint32_t*)(&CNT[4]) = C1;
#endif
}
void block_cipher(
unsigned char* s,
const unsigned char* k, unsigned char* T,
unsigned char* CNT, unsigned char D,
skinny_ctrl* p_skinny_ctrl) {
CNT[7] = D;
p_skinny_ctrl->func_skinny_128_384_enc(s, p_skinny_ctrl, CNT, T, k);
}
void nonce_encryption (
const unsigned char* N,
unsigned char* CNT,
unsigned char*s, const unsigned char* k,
unsigned char D,
skinny_ctrl* p_skinny_ctrl) {
block_cipher(s,k,(unsigned char*)N,CNT,D,p_skinny_ctrl);
}
void generate_tag (
unsigned char** c, unsigned char* s,
unsigned long long* clen) {
g8A_for_Tag_Generation(s, *c);
*c = *c + 16;
*c = *c - *clen;
}
unsigned long long msg_encryption (
const unsigned char** M, unsigned char** c,
const unsigned char* N,
unsigned char* CNT,
unsigned char*s, const unsigned char* k,
unsigned char D,
unsigned long long mlen,
skinny_ctrl* l_skinny_ctrl) {
int len8;
if (mlen >= 16) {
len8 = 16;
mlen = mlen - 16;
rho_eqov16(*M, *c, s);
}
else {
len8 = mlen;
mlen = 0;
rho_ud16(*M, *c, s, len8);
}
*c = *c + len8;
*M = *M + len8;
lfsr_gf56(CNT);
if (mlen != 0) {
nonce_encryption(N,CNT,s,k,D,l_skinny_ctrl);
}
return mlen;
}
unsigned long long msg_decryption (
unsigned char** M, const unsigned char** c,
const unsigned char* N,
unsigned char* CNT,
unsigned char*s, const unsigned char* k,
unsigned char D,
unsigned long long clen,
skinny_ctrl* l_skinny_ctrl) {
int len8;
if (clen >= 16) {
len8 = 16;
clen = clen - 16;
irho_eqov16(*M, *c, s);
}
else {
len8 = clen;
clen = 0;
irho_ud16(*M, *c, s, len8);
}
*c = *c + len8;
*M = *M + len8;
lfsr_gf56(CNT);
nonce_encryption(N,CNT,s,k,D,l_skinny_ctrl);
return clen;
}
unsigned long long ad2msg_encryption (
const unsigned char** M,
unsigned char* CNT,
unsigned char*s, const unsigned char* k,
unsigned char D,
unsigned long long mlen,
skinny_ctrl* l_skinny_ctrl) {
unsigned char T [16];
int len8;
if (mlen <= 16) {
len8 = mlen;
mlen = 0;
}
else {
len8 = 16;
mlen = mlen - 16;
}
pad (*M,T,len8);
block_cipher(s,k,T,CNT,D,l_skinny_ctrl);
lfsr_gf56(CNT);
*M = *M + len8;
return mlen;
}
unsigned long long ad_encryption (
const unsigned char** A, unsigned char* s,
const unsigned char* k, unsigned long long adlen,
unsigned char* CNT,
unsigned char D,
skinny_ctrl* l_skinny_ctrl) {
unsigned char T [16];
int len8;
if (adlen >= 16) {
len8 = 16;
adlen = adlen - 16;
rho_ad_eqov16(*A, s);
}
else {
len8 = adlen;
adlen = 0;
rho_ad_ud16(*A, s, len8);
}
*A = *A + len8;
lfsr_gf56(CNT);
if (adlen != 0) {
if (adlen >= 16) {
len8 = 16;
adlen = adlen - 16;
}
else {
len8 = adlen;
adlen = 0;
}
pad(*A, T, len8);
*A = *A + len8;
block_cipher(s,k,T,CNT,D,l_skinny_ctrl);
lfsr_gf56(CNT);
}
return adlen;
}
int crypto_aead_encrypt (
unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec,
const unsigned char* npub,
const unsigned char* k) {
unsigned char s[16];
unsigned char CNT[8];
unsigned char T[16];
const unsigned char* N;
unsigned char w;
unsigned long long xlen;
skinny_ctrl l_skinny_ctrl;
l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12;
(void)nsec;
N = npub;
xlen = mlen;
#ifdef ___ENABLE_DWORD_CAST
*(uint64_t*)(&s[0]) = 0;
*(uint64_t*)(&s[8]) = 0;
#else
*(uint32_t*)(&s[0]) = 0;
*(uint32_t*)(&s[4]) = 0;
*(uint32_t*)(&s[8]) = 0;
*(uint32_t*)(&s[12]) = 0;
#endif
reset_lfsr_gf56(CNT);
w = 48;
if (adlen == 0) {
w = w ^ 2;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
else if (adlen%(32) == 0) {
w = w ^ 8;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
else if (adlen%(32) < 16) {
w = w ^ 2;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
else if (adlen%(32) == 16) {
w = w ^ 0;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
else {
w = w ^ 10;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
if (adlen == 0) { // AD is an empty string
lfsr_gf56(CNT);
}
else while (adlen > 0) {
adlen = ad_encryption(&ad,s,k,adlen,CNT,40,&l_skinny_ctrl);
}
if ((w & 8) == 0) {
xlen = ad2msg_encryption (&m,CNT,s,k,44,xlen,&l_skinny_ctrl);
}
else if (mlen == 0) {
lfsr_gf56(CNT);
}
while (xlen > 0) {
xlen = ad_encryption(&m,s,k,xlen,CNT,44,&l_skinny_ctrl);
}
nonce_encryption(N,CNT,s,k,w,&l_skinny_ctrl);
// Tag generation
g8A(s, T);
m = m - mlen;
l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1;
reset_lfsr_gf56(CNT);
#ifdef ___ENABLE_DWORD_CAST
*(uint64_t*)(&s[0]) = *(uint64_t*)(&T[0]);
*(uint64_t*)(&s[8]) = *(uint64_t*)(&T[8]);
#else
*(uint32_t*)(&s[0]) = *(uint32_t*)(&T[0]);
*(uint32_t*)(&s[4]) = *(uint32_t*)(&T[4]);
*(uint32_t*)(&s[8]) = *(uint32_t*)(&T[8]);
*(uint32_t*)(&s[12]) = *(uint32_t*)(&T[12]);
#endif
*clen = mlen + 16;
if (mlen > 0) {
nonce_encryption(N,CNT,s,k,36,&l_skinny_ctrl);
while (mlen > 16) {
mlen = msg_encryption(&m,&c,N,CNT,s,k,36,mlen,&l_skinny_ctrl);
}
rho_ud16(m, c, s, mlen);
c = c + mlen;
m = m + mlen;
}
// Tag Concatenation
c[0] = T[0];
c[1] = T[1];
c[2] = T[2];
c[3] = T[3];
c[4] = T[4];
c[5] = T[5];
c[6] = T[6];
c[7] = T[7];
c[8] = T[8];
c[9] = T[9];
c[10] = T[10];
c[11] = T[11];
c[12] = T[12];
c[13] = T[13];
c[14] = T[14];
c[15] = T[15];
c = c - *clen;
return 0;
}
int crypto_aead_decrypt(
unsigned char *m,unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c,unsigned long long clen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k) {
unsigned char s[16];
unsigned char CNT[8];
unsigned char T[16];
const unsigned char* N;
unsigned char w;
unsigned long long xlen;
const unsigned char* mauth;
unsigned char* p1;
unsigned char* p2;
skinny_ctrl l_skinny_ctrl;
l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12;
(void)nsec;
mauth = m;
N = npub;
xlen = clen-16;
#ifdef ___ENABLE_DWORD_CAST
*(uint64_t*)(&s[0]) = 0;
*(uint64_t*)(&s[8]) = 0;
#else
*(uint32_t*)(&s[0]) = 0;
*(uint32_t*)(&s[4]) = 0;
*(uint32_t*)(&s[8]) = 0;
*(uint32_t*)(&s[12]) = 0;
#endif
reset_lfsr_gf56(CNT);
w = 48;
if (adlen == 0) {
w = w ^ 2;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
else if (adlen%(32) == 0) {
w = w ^ 8;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
else if (adlen%(32) < 16) {
w = w ^ 2;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
else if (adlen%(32) == 16) {
w = w ^ 0;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
else {
w = w ^ 10;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
if (adlen == 0) { // AD is an empty string
lfsr_gf56(CNT);
}
else while (adlen > 0) {
adlen = ad_encryption(&ad,s,k,adlen,CNT,40,&l_skinny_ctrl);
}
if ((w & 8) == 0) {
xlen = ad2msg_encryption (&mauth,CNT,s,k,44,xlen,&l_skinny_ctrl);
}
else if (clen == 0) {
lfsr_gf56(CNT);
}
while (xlen > 0) {
xlen = ad_encryption(&mauth,s,k,xlen,CNT,44,&l_skinny_ctrl);
}
nonce_encryption(N,CNT,s,k,w,&l_skinny_ctrl);
// Tag generation
g8A(s, T);
l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1;
reset_lfsr_gf56(CNT);
p1 = T;
p2 = (unsigned char*)&c[clen - 16];
p1[0] = p2[0];
p1[1] = p2[1];
p1[2] = p2[2];
p1[3] = p2[3];
p1[4] = p2[4];
p1[5] = p2[5];
p1[6] = p2[6];
p1[7] = p2[7];
p1[8] = p2[8];
p1[9] = p2[9];
p1[10] = p2[10];
p1[11] = p2[11];
p1[12] = p2[12];
p1[13] = p2[13];
p1[14] = p2[14];
p1[15] = p2[15];
#ifdef ___ENABLE_DWORD_CAST
*(uint64_t*)(&s[0]) = *(uint64_t*)(&T[0]);
*(uint64_t*)(&s[8]) = *(uint64_t*)(&T[8]);
#else
*(uint32_t*)(&s[0]) = *(uint32_t*)(&T[0]);
*(uint32_t*)(&s[4]) = *(uint32_t*)(&T[4]);
*(uint32_t*)(&s[8]) = *(uint32_t*)(&T[8]);
*(uint32_t*)(&s[12]) = *(uint32_t*)(&T[12]);
#endif
clen = clen - 16;
*mlen = clen;
if (clen > 0) {
nonce_encryption(N,CNT,s,k,36,&l_skinny_ctrl);
l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1;
while (clen > 16) {
clen = msg_decryption(&m,&c,N,CNT,s,k,36,clen,&l_skinny_ctrl);
}
irho_ud16(m, c, s, clen);
c = c + clen;
m = m + clen;
}
for (int i = 0; i < 16; i++) {
if (T[i] != (*(c+i))) {
return -1;
}
}
return 0;
}
#define ___SKINNY_LOOP
//#define ___NUM_OF_ROUNDS_56
#if (defined(__riscv_xlen) && (__riscv_xlen == 64))
#define ___ENABLE_DWORD_CAST
#endif
#include <stdint.h>
typedef struct ___skinny_ctrl {
#ifdef ___NUM_OF_ROUNDS_56
unsigned char roundKeys[960]; // number of rounds : 56
#else
unsigned char roundKeys[704]; // number of rounds : 40
#endif
void (*func_skinny_128_384_enc)(unsigned char*, struct ___skinny_ctrl*, unsigned char* CNT, unsigned char* T, const unsigned char* K);
} skinny_ctrl;
extern void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K);
extern void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K);
extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K);
#define pack_word(x0, x1, x2, x3, w) \
w = ((x3) << 24) ^ \
((x2) << 16) ^ \
((x1) << 8) ^ \
(x0);
#define unpack_word(x0, x1, x2, x3, w) \
x0 = ((w) & 0xff); \
x1 = (((w) >> 8) & 0xff); \
x2 = (((w) >> 16) & 0xff); \
x3 = ((w) >> 24);
#ifdef ___ENABLE_DWORD_CAST
#define PERMUTATION() \
/* permutation */ \
\
/* 7 6 5 4 3 2 1 0 */ \
/* 5 7 2 3 6 0 4 1 */ \
\
/* dw (7 6 5 4 3 2 1 0) */ \
\
/* dw (5 7 2 3 6 0 4 1) */ \
\
dt0 = dw >> 24; /* - - - 7 6 5 4 3 */ \
dt0 = dt0 & 0x00000000ff00ff00; /* - - - - 6 - 4 - */ \
\
dt1 = dw << 16; /* 5 4 3 2 1 0 - - */ \
dt1 = dt1 & 0xff00000000ff0000; /* 5 - - - - 0 - - */ \
dt0 = dt0 ^ dt1; /* 5 - - - 6 0 4 - */ \
\
dt1 = dw >> 8; /* - 7 6 5 4 3 2 1 */ \
dt1 = dt1 & 0x00ff0000000000ff; /* - 7 - - - - - 1 */ \
dt0 = dt0 ^ dt1; /* 5 7 - - 6 0 4 1 */ \
\
dt1 = dw << 8; /* 6 5 4 3 2 1 0 - */ \
dt1 = dt1 & 0x000000ff00000000; /* - - - 3 - - - - */ \
dt0 = dt0 ^ dt1; /* 5 7 - 3 6 0 4 1 */ \
\
dt1 = dw << 24; /* 4 3 2 1 0 - - - */ \
dw = dt1 & 0x0000ff0000000000; /* - - 2 - - - - - */ \
dw = dw ^ dt0; /* 5 7 2 3 6 0 4 1 */
#else
#define PERMUTATION() \
/* permutation */ \
\
/* 7 6 5 4 3 2 1 0 */ \
/* 5 7 2 3 6 0 4 1 */ \
\
/* w0 (3 2 1 0) */ \
/* w1 (7 6 5 4) */ \
\
/* w0 (6 0 4 1) */ \
/* w1 (5 7 2 3) */ \
\
t0 = w1 << 8; /* 6 5 4 - */ \
t0 = t0 & 0xff00ff00; /* 6 - 4 - */ \
\
t1 = w1 << 16; /* 5 4 - - */ \
t1 = t1 & 0xff000000; /* 5 - - - */ \
\
t2 = w1 & 0xff000000; /* 7 - - - */ \
t2 = t2 >> 8; /* - 7 - - */ \
t1 = t1 ^ t2; /* 5 7 - - */ \
\
t2 = w0 & 0xff000000; /* 3 - - - */ \
t2 = t2 >> 24; /* - - - 3 */ \
t1 = t1 ^ t2; /* 5 7 - 3 */ \
\
w1 = w0 >> 8; /* - 3 2 1 */ \
w1 = w1 & 0x0000ff00; /* - - 2 - */ \
w1 = w1 ^ t1; /* 5 7 2 3 */ \
\
t2 = w0 & 0x0000ff00; /* - - 1 - */ \
t2 = t2 >> 8; /* - - - 1 */ \
t0 = t0 ^ t2; /* 6 - 4 1 */ \
\
w0 = w0 << 16; /* 1 0 - - */ \
w0 = w0 & 0x00ff0000; /* - 0 - - */ \
w0 = w0 ^ t0; /* 6 0 4 1 */
#endif
/******************************************************************************
* Copyright (c) 2020, NEC Corporation.
*
* THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND.
*
*****************************************************************************/
/*
* SKINNY-128-384
*
* load * AC(c0 c1) ^ TK3
* calc AC(c0 c1) ^ TK2 -> store
* ART(TK2)
*
* number of rounds : 40 or 56
*/
#include "skinny.h"
#ifdef ___ENABLE_DWORD_CAST
#define PERMUTATION_TK2() \
\
/* permutation */ \
\
PERMUTATION() \
\
/* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \
dw = ((dw << 1) & 0xfefefefefefefefe) ^ \
(((dw >> 7) ^ (dw >> 5)) & 0x0101010101010101); \
\
/* Load TK3 */ \
/* TK2^TK3^AC(c0 c1) */ \
/* store */ \
*tk2 = dw ^ *tk3; \
tk2 += 2; \
tk3 += 2;
#ifndef ___SKINNY_LOOP
void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys)
{
uint64_t* tk2; // used in MACRO
uint64_t* tk3; // used in MACRO
uint64_t dt0; // used in MACRO
uint64_t dt1; // used in MACRO
uint64_t dw;
// odd
// load master key
// load master key
dw = *(uint64_t*)&roundKeys[16];
tk2 = (uint64_t*)&roundKeys[64];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[384];
#else
tk3 = (uint64_t*)&roundKeys[512];
#endif
// 1st round
*tk2 = dw ^ *tk3;
tk2 += 2;
tk3 += 2;
// 3rd,5th, ... ,37th,39th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#ifdef ___NUM_OF_ROUNDS_56
// 41th,43th, ... ,51th,53th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#endif
// even
// load master key
dw = *(uint64_t*)&roundKeys[24];
tk2 = (uint64_t*)&roundKeys[72];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[392];
#else
tk3 = (uint64_t*)&roundKeys[520];
#endif
// 2nd,4th, ... ,54th,56th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#ifdef ___NUM_OF_ROUNDS_56
// 42nd,44th, ... ,54th,56th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#endif
}
#else /* ___SKINNY_LOOP */
void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys)
{
uint64_t* tk2; // used in MACRO
uint64_t* tk3; // used in MACRO
uint64_t dt0; // used in MACRO
uint64_t dt1; // used in MACRO
uint64_t dw;
// odd
// load master key
dw = *(uint64_t*)&roundKeys[16];
tk2 = (uint64_t*)&roundKeys[64];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[384];
#else
tk3 = (uint64_t*)&roundKeys[512];
#endif
// 1st round
*tk2 = dw ^ *tk3;
tk2 += 2;
tk3 += 2;
// 3rd,5th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<19;i++)
#else
for(int i=0;i<27;i++)
#endif
{
PERMUTATION_TK2();
}
// even
// load master key
dw = *(uint64_t*)&roundKeys[24];
tk2 = (uint64_t*)&roundKeys[72];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[392];
#else
tk3 = (uint64_t*)&roundKeys[520];
#endif
// 2nd,4th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<20;i++)
#else
for(int i=0;i<28;i++)
#endif
{
PERMUTATION_TK2();
}
}
#endif /* ___SKINNY_LOOP */
#else /* ___ENABLE_DWORD_CAST */
#define PERMUTATION_TK2() \
\
/* permutation */ \
\
PERMUTATION() \
\
/* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \
w0 = ((w0 << 1) & 0xfefefefe) ^ \
(((w0 >> 7) ^ (w0 >> 5)) & 0x01010101); \
w1 = ((w1 << 1) & 0xfefefefe) ^ \
(((w1 >> 7) ^ (w1 >> 5)) & 0x01010101); \
\
/* Load TK3 */ \
/* TK2^TK3^AC(c0 c1) */ \
/* store */ \
*tk2++ = w0 ^ *tk3++; \
*tk2++ = w1 ^ *tk3++; \
tk2 += 2; \
tk3 += 2;
#ifndef ___SKINNY_LOOP
void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys)
{
uint32_t* tk2; // used in MACRO
uint32_t* tk3; // used in MACRO
uint32_t t0; // used in MACRO
uint32_t t1; // used in MACRO
uint32_t t2; // used in MACRO
uint32_t w0;
uint32_t w1;
// odd
// load master key
w0 = *(uint32_t*)&roundKeys[16];
w1 = *(uint32_t*)&roundKeys[20];
tk2 = (uint32_t*)&roundKeys[64];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[384];
#else
tk3 = (uint32_t*)&roundKeys[512];
#endif
// 1st round
*tk2++ = w0 ^ *tk3++;
*tk2++ = w1 ^ *tk3++;
tk2 += 2;
tk3 += 2;
// 3rd,5th, ... ,37th,39th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#ifdef ___NUM_OF_ROUNDS_56
// 41th,43th, ... ,51th,53th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#endif
// even
// load master key
w0 = *(uint32_t*)&roundKeys[24];
w1 = *(uint32_t*)&roundKeys[28];
tk2 = (uint32_t*)&roundKeys[72];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[392];
#else
tk3 = (uint32_t*)&roundKeys[520];
#endif
// 2nd,4th, ... ,54th,56th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#ifdef ___NUM_OF_ROUNDS_56
// 42nd,44th, ... ,54th,56th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#endif
}
#else /* ___SKINNY_LOOP */
void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys)
{
uint32_t* tk2; // used in MACRO
uint32_t* tk3; // used in MACRO
uint32_t t0; // used in MACRO
uint32_t t1; // used in MACRO
uint32_t t2; // used in MACRO
uint32_t w0;
uint32_t w1;
// odd
// load master key
w0 = *(uint32_t*)&roundKeys[16];
w1 = *(uint32_t*)&roundKeys[20];
tk2 = (uint32_t*)&roundKeys[64];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[384];
#else
tk3 = (uint32_t*)&roundKeys[512];
#endif
// 1st round
*tk2++ = w0 ^ *tk3++;
*tk2++ = w1 ^ *tk3++;
tk2 += 2;
tk3 += 2;
// 3rd,5th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<19;i++)
#else
for(int i=0;i<27;i++)
#endif
{
PERMUTATION_TK2();
}
// even
// load master key
w0 = *(uint32_t*)&roundKeys[24];
w1 = *(uint32_t*)&roundKeys[28];
tk2 = (uint32_t*)&roundKeys[72];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[392];
#else
tk3 = (uint32_t*)&roundKeys[520];
#endif
// 2nd,4th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<20;i++)
#else
for(int i=0;i<28;i++)
#endif
{
PERMUTATION_TK2();
}
}
#endif /* ___SKINNY_LOOP */
#endif /* ___ENABLE_DWORD_CAST */
/******************************************************************************
* Copyright (c) 2020, NEC Corporation.
*
* THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND.
*
*****************************************************************************/
/*
* SKINNY-128-384
*
* AC(c0 c1) ^ TK3 -> store
* ART(TK3)
*
* number of rounds : 40 or 56
*/
#include "skinny.h"
#ifdef ___ENABLE_DWORD_CAST
#define PERMUTATION_TK3(c0Val, c1Val) \
\
/* permutation */ \
\
PERMUTATION() \
\
/* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \
dw = ((dw >> 1) & 0x7f7f7f7f7f7f7f7f) ^ \
(((dw << 7) ^ (dw << 1)) & 0x8080808080808080); \
\
/* K3^AC(c0 c1) */ \
/* store */ \
dt0 = dw ^ c0Val; \
*tk3 = dt0 ^ ((uint64_t)c1Val << 40); \
tk3 += 2;
#ifndef ___SKINNY_LOOP
void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys)
{
uint64_t *tk3;
uint64_t dt0; // used in MACRO
uint64_t dt1; // used in MACRO
uint64_t dw;
// odd
// load master key
dw = *(uint64_t*)&roundKeys[32];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[384];
#else
tk3 = (uint64_t*)&roundKeys[512];
#endif
// 1st round
*tk3++ = dw ^ 0x01;
tk3 += 1;
// 3rd,5th, ... ,37th,39th round
PERMUTATION_TK3(0x7, 0x0);
PERMUTATION_TK3(0xf, 0x1);
PERMUTATION_TK3(0xd, 0x3);
PERMUTATION_TK3(0x7, 0x3);
PERMUTATION_TK3(0xe, 0x1);
PERMUTATION_TK3(0x9, 0x3);
PERMUTATION_TK3(0x7, 0x2);
PERMUTATION_TK3(0xd, 0x1);
PERMUTATION_TK3(0x5, 0x3);
PERMUTATION_TK3(0x6, 0x1);
PERMUTATION_TK3(0x8, 0x1);
PERMUTATION_TK3(0x1, 0x2);
PERMUTATION_TK3(0x5, 0x0);
PERMUTATION_TK3(0x7, 0x1);
PERMUTATION_TK3(0xc, 0x1);
PERMUTATION_TK3(0x1, 0x3);
PERMUTATION_TK3(0x6, 0x0);
PERMUTATION_TK3(0xb, 0x1);
PERMUTATION_TK3(0xd, 0x2);
#ifdef ___NUM_OF_ROUNDS_56
// 41td,43th, ... ,53th,55th round
PERMUTATION_TK3(0x4, 0x3);
PERMUTATION_TK3(0x2, 0x1);
PERMUTATION_TK3(0x8, 0x0);
PERMUTATION_TK3(0x2, 0x2);
PERMUTATION_TK3(0x9, 0x0);
PERMUTATION_TK3(0x6, 0x2);
PERMUTATION_TK3(0x9, 0x1);
PERMUTATION_TK3(0x5, 0x2);
#endif
// even
// load master key
dw = *(uint64_t*)&roundKeys[40];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[392];
#else
tk3 = (uint64_t*)&roundKeys[520];
#endif
// 2nd,4th, ... ,38th,40th round
PERMUTATION_TK3(0x3, 0x0);
PERMUTATION_TK3(0xf, 0x0);
PERMUTATION_TK3(0xe, 0x3);
PERMUTATION_TK3(0xb, 0x3);
PERMUTATION_TK3(0xf, 0x2);
PERMUTATION_TK3(0xc, 0x3);
PERMUTATION_TK3(0x3, 0x3);
PERMUTATION_TK3(0xe, 0x0);
PERMUTATION_TK3(0xa, 0x3);
PERMUTATION_TK3(0xb, 0x2);
PERMUTATION_TK3(0xc, 0x2);
PERMUTATION_TK3(0x0, 0x3);
PERMUTATION_TK3(0x2, 0x0);
PERMUTATION_TK3(0xb, 0x0);
PERMUTATION_TK3(0xe, 0x2);
PERMUTATION_TK3(0x8, 0x3);
PERMUTATION_TK3(0x3, 0x2);
PERMUTATION_TK3(0xd, 0x0);
PERMUTATION_TK3(0x6, 0x3);
PERMUTATION_TK3(0xa, 0x1);
#ifdef ___NUM_OF_ROUNDS_56
// 42nd,44th, ... ,54th,56th round
PERMUTATION_TK3(0x9, 0x2);
PERMUTATION_TK3(0x4, 0x2);
PERMUTATION_TK3(0x1, 0x1);
PERMUTATION_TK3(0x4, 0x0);
PERMUTATION_TK3(0x3, 0x1);
PERMUTATION_TK3(0xc, 0x0);
PERMUTATION_TK3(0x2, 0x3);
PERMUTATION_TK3(0xa, 0x0);
#endif
}
#else /* ___SKINNY_LOOP */
void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC)
{
uint64_t *tk3;
uint64_t dt0; // used in MACRO
uint64_t dt1; // used in MACRO
uint64_t dw;
uint64_t c0;
uint64_t c1;
// odd
// load master key
dw = *(uint64_t*)&roundKeys[32];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[384];
#else
tk3 = (uint64_t*)&roundKeys[512];
#endif
// 1st round
*tk3++ = dw ^ 0x01;
tk3 += 1;
pRC += 4;
// 3rd,5th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<19;i++)
#else
for(int i=0;i<27;i++)
#endif
{
c0 = *pRC++;
c1 = *pRC++;
pRC += 2;
PERMUTATION_TK3(c0, c1);
}
// even
// load master key
dw = *(uint64_t*)&roundKeys[40];
#ifndef ___NUM_OF_ROUNDS_56
pRC -= 78;
tk3 = (uint64_t*)&roundKeys[392];
#else
pRC -= 110;
tk3 = (uint64_t*)&roundKeys[520];
#endif
// 2nd,4th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<20;i++)
#else
for(int i=0;i<28;i++)
#endif
{
c0 = *pRC++;
c1 = *pRC++;
pRC += 2;
PERMUTATION_TK3(c0, c1);
}
}
#endif /* ___SKINNY_LOOP */
#else /* ___ENABLE_DWORD_CAST */
#define PERMUTATION_TK3(c0Val, c1Val) \
\
/* permutation */ \
\
PERMUTATION() \
\
/* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \
w0 = ((w0 >> 1) & 0x7f7f7f7f) ^ \
(((w0 << 7) ^ (w0 << 1)) & 0x80808080); \
w1 = ((w1 >> 1) & 0x7f7f7f7f) ^ \
(((w1 << 7) ^ (w1 << 1)) & 0x80808080); \
\
/* K3^AC(c0 c1) */ \
/* store */ \
*tk3++ = w0 ^ c0Val; \
*tk3++ = w1 ^ ((uint32_t)c1Val << 8); \
tk3 += 2;
#ifndef ___SKINNY_LOOP
void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys)
{
uint32_t *tk3;
uint32_t t0; // used in MACRO
uint32_t t1; // used in MACRO
uint32_t t2; // used in MACRO
uint32_t w0;
uint32_t w1;
// odd
// load master key
w0 = *(uint32_t*)&roundKeys[32];
w1 = *(uint32_t*)&roundKeys[36];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[384];
#else
tk3 = (uint32_t*)&roundKeys[512];
#endif
// 1st round
*tk3++ = w0 ^ 0x01;
*tk3++ = w1;
tk3 += 2;
// 3rd,5th, ... ,37th,39th round
PERMUTATION_TK3(0x7, 0x0);
PERMUTATION_TK3(0xf, 0x1);
PERMUTATION_TK3(0xd, 0x3);
PERMUTATION_TK3(0x7, 0x3);
PERMUTATION_TK3(0xe, 0x1);
PERMUTATION_TK3(0x9, 0x3);
PERMUTATION_TK3(0x7, 0x2);
PERMUTATION_TK3(0xd, 0x1);
PERMUTATION_TK3(0x5, 0x3);
PERMUTATION_TK3(0x6, 0x1);
PERMUTATION_TK3(0x8, 0x1);
PERMUTATION_TK3(0x1, 0x2);
PERMUTATION_TK3(0x5, 0x0);
PERMUTATION_TK3(0x7, 0x1);
PERMUTATION_TK3(0xc, 0x1);
PERMUTATION_TK3(0x1, 0x3);
PERMUTATION_TK3(0x6, 0x0);
PERMUTATION_TK3(0xb, 0x1);
PERMUTATION_TK3(0xd, 0x2);
#ifdef ___NUM_OF_ROUNDS_56
// 41td,43th, ... ,53th,55th round
PERMUTATION_TK3(0x4, 0x3);
PERMUTATION_TK3(0x2, 0x1);
PERMUTATION_TK3(0x8, 0x0);
PERMUTATION_TK3(0x2, 0x2);
PERMUTATION_TK3(0x9, 0x0);
PERMUTATION_TK3(0x6, 0x2);
PERMUTATION_TK3(0x9, 0x1);
PERMUTATION_TK3(0x5, 0x2);
#endif
// even
// load master key
w0 = *(uint32_t*)&roundKeys[40];
w1 = *(uint32_t*)&roundKeys[44];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[392];
#else
tk3 = (uint32_t*)&roundKeys[520];
#endif
// 2nd,4th, ... ,38th,40th round
PERMUTATION_TK3(0x3, 0x0);
PERMUTATION_TK3(0xf, 0x0);
PERMUTATION_TK3(0xe, 0x3);
PERMUTATION_TK3(0xb, 0x3);
PERMUTATION_TK3(0xf, 0x2);
PERMUTATION_TK3(0xc, 0x3);
PERMUTATION_TK3(0x3, 0x3);
PERMUTATION_TK3(0xe, 0x0);
PERMUTATION_TK3(0xa, 0x3);
PERMUTATION_TK3(0xb, 0x2);
PERMUTATION_TK3(0xc, 0x2);
PERMUTATION_TK3(0x0, 0x3);
PERMUTATION_TK3(0x2, 0x0);
PERMUTATION_TK3(0xb, 0x0);
PERMUTATION_TK3(0xe, 0x2);
PERMUTATION_TK3(0x8, 0x3);
PERMUTATION_TK3(0x3, 0x2);
PERMUTATION_TK3(0xd, 0x0);
PERMUTATION_TK3(0x6, 0x3);
PERMUTATION_TK3(0xa, 0x1);
#ifdef ___NUM_OF_ROUNDS_56
// 42nd,44th, ... ,54th,56th round
PERMUTATION_TK3(0x9, 0x2);
PERMUTATION_TK3(0x4, 0x2);
PERMUTATION_TK3(0x1, 0x1);
PERMUTATION_TK3(0x4, 0x0);
PERMUTATION_TK3(0x3, 0x1);
PERMUTATION_TK3(0xc, 0x0);
PERMUTATION_TK3(0x2, 0x3);
PERMUTATION_TK3(0xa, 0x0);
#endif
}
#else /* ___SKINNY_LOOP */
void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC)
{
uint32_t *tk3;
uint32_t t0; // used in MACRO
uint32_t t1; // used in MACRO
uint32_t t2; // used in MACRO
uint32_t w0;
uint32_t w1;
uint32_t c0;
uint32_t c1;
// odd
// load master key
w0 = *(uint32_t*)&roundKeys[32];
w1 = *(uint32_t*)&roundKeys[36];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[384];
#else
tk3 = (uint32_t*)&roundKeys[512];
#endif
// 1st round
*tk3++ = w0 ^ 0x01;
*tk3++ = w1;
tk3 += 2;
pRC += 4;
// 3rd,5th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<19;i++)
#else
for(int i=0;i<27;i++)
#endif
{
c0 = *pRC++;
c1 = *pRC++;
pRC += 2;
PERMUTATION_TK3(c0, c1);
}
// even
// load master key
w0 = *(uint32_t*)&roundKeys[40];
w1 = *(uint32_t*)&roundKeys[44];
#ifndef ___NUM_OF_ROUNDS_56
pRC -= 78;
tk3 = (uint32_t*)&roundKeys[392];
#else
pRC -= 110;
tk3 = (uint32_t*)&roundKeys[520];
#endif
// 2nd,4th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<20;i++)
#else
for(int i=0;i<28;i++)
#endif
{
c0 = *pRC++;
c1 = *pRC++;
pRC += 2;
PERMUTATION_TK3(c0, c1);
}
}
#endif /* ___SKINNY_LOOP */
#endif /* ___ENABLE_DWORD_CAST */
/******************************************************************************
* Copyright (c) 2020, NEC Corporation.
*
* THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND.
*
*****************************************************************************/
/*
* SKINNY-128-384
*
* ART(TK1) -> store
* load AC(c0 c1) ^ TK3 ^ TK2
* load TK1
* calc AC(c0 c1) ^ TK3 ^ TK2 ^ TK1 -> use at (AC->ART)
* SC->SR->(AC->ART)->MC
*
* number of rounds : 40 or 56
*/
#include "skinny.h"
/*
* S-BOX
*/
unsigned char SBOX[]
= {
// Original
0x65, 0x4c, 0x6a, 0x42, 0x4b, 0x63, 0x43, 0x6b, 0x55, 0x75, 0x5a, 0x7a, 0x53, 0x73, 0x5b, 0x7b,
0x35, 0x8c, 0x3a, 0x81, 0x89, 0x33, 0x80, 0x3b, 0x95, 0x25, 0x98, 0x2a, 0x90, 0x23, 0x99, 0x2b,
0xe5, 0xcc, 0xe8, 0xc1, 0xc9, 0xe0, 0xc0, 0xe9, 0xd5, 0xf5, 0xd8, 0xf8, 0xd0, 0xf0, 0xd9, 0xf9,
0xa5, 0x1c, 0xa8, 0x12, 0x1b, 0xa0, 0x13, 0xa9, 0x05, 0xb5, 0x0a, 0xb8, 0x03, 0xb0, 0x0b, 0xb9,
0x32, 0x88, 0x3c, 0x85, 0x8d, 0x34, 0x84, 0x3d, 0x91, 0x22, 0x9c, 0x2c, 0x94, 0x24, 0x9d, 0x2d,
0x62, 0x4a, 0x6c, 0x45, 0x4d, 0x64, 0x44, 0x6d, 0x52, 0x72, 0x5c, 0x7c, 0x54, 0x74, 0x5d, 0x7d,
0xa1, 0x1a, 0xac, 0x15, 0x1d, 0xa4, 0x14, 0xad, 0x02, 0xb1, 0x0c, 0xbc, 0x04, 0xb4, 0x0d, 0xbd,
0xe1, 0xc8, 0xec, 0xc5, 0xcd, 0xe4, 0xc4, 0xed, 0xd1, 0xf1, 0xdc, 0xfc, 0xd4, 0xf4, 0xdd, 0xfd,
0x36, 0x8e, 0x38, 0x82, 0x8b, 0x30, 0x83, 0x39, 0x96, 0x26, 0x9a, 0x28, 0x93, 0x20, 0x9b, 0x29,
0x66, 0x4e, 0x68, 0x41, 0x49, 0x60, 0x40, 0x69, 0x56, 0x76, 0x58, 0x78, 0x50, 0x70, 0x59, 0x79,
0xa6, 0x1e, 0xaa, 0x11, 0x19, 0xa3, 0x10, 0xab, 0x06, 0xb6, 0x08, 0xba, 0x00, 0xb3, 0x09, 0xbb,
0xe6, 0xce, 0xea, 0xc2, 0xcb, 0xe3, 0xc3, 0xeb, 0xd6, 0xf6, 0xda, 0xfa, 0xd3, 0xf3, 0xdb, 0xfb,
0x31, 0x8a, 0x3e, 0x86, 0x8f, 0x37, 0x87, 0x3f, 0x92, 0x21, 0x9e, 0x2e, 0x97, 0x27, 0x9f, 0x2f,
0x61, 0x48, 0x6e, 0x46, 0x4f, 0x67, 0x47, 0x6f, 0x51, 0x71, 0x5e, 0x7e, 0x57, 0x77, 0x5f, 0x7f,
0xa2, 0x18, 0xae, 0x16, 0x1f, 0xa7, 0x17, 0xaf, 0x01, 0xb2, 0x0e, 0xbe, 0x07, 0xb7, 0x0f, 0xbf,
0xe2, 0xca, 0xee, 0xc6, 0xcf, 0xe7, 0xc7, 0xef, 0xd2, 0xf2, 0xde, 0xfe, 0xd7, 0xf7, 0xdf, 0xff,
};
/*
* S-BOX ^ AC(c2)
*/
unsigned char SBOX2[]
= { // Original ^ c2(0x02)
0x67, 0x4e, 0x68, 0x40, 0x49, 0x61, 0x41, 0x69, 0x57, 0x77, 0x58, 0x78, 0x51, 0x71, 0x59, 0x79,
0x37, 0x8e, 0x38, 0x83, 0x8b, 0x31, 0x82, 0x39, 0x97, 0x27, 0x9a, 0x28, 0x92, 0x21, 0x9b, 0x29,
0xe7, 0xce, 0xea, 0xc3, 0xcb, 0xe2, 0xc2, 0xeb, 0xd7, 0xf7, 0xda, 0xfa, 0xd2, 0xf2, 0xdb, 0xfb,
0xa7, 0x1e, 0xaa, 0x10, 0x19, 0xa2, 0x11, 0xab, 0x07, 0xb7, 0x08, 0xba, 0x01, 0xb2, 0x09, 0xbb,
0x30, 0x8a, 0x3e, 0x87, 0x8f, 0x36, 0x86, 0x3f, 0x93, 0x20, 0x9e, 0x2e, 0x96, 0x26, 0x9f, 0x2f,
0x60, 0x48, 0x6e, 0x47, 0x4f, 0x66, 0x46, 0x6f, 0x50, 0x70, 0x5e, 0x7e, 0x56, 0x76, 0x5f, 0x7f,
0xa3, 0x18, 0xae, 0x17, 0x1f, 0xa6, 0x16, 0xaf, 0x00, 0xb3, 0x0e, 0xbe, 0x06, 0xb6, 0x0f, 0xbf,
0xe3, 0xca, 0xee, 0xc7, 0xcf, 0xe6, 0xc6, 0xef, 0xd3, 0xf3, 0xde, 0xfe, 0xd6, 0xf6, 0xdf, 0xff,
0x34, 0x8c, 0x3a, 0x80, 0x89, 0x32, 0x81, 0x3b, 0x94, 0x24, 0x98, 0x2a, 0x91, 0x22, 0x99, 0x2b,
0x64, 0x4c, 0x6a, 0x43, 0x4b, 0x62, 0x42, 0x6b, 0x54, 0x74, 0x5a, 0x7a, 0x52, 0x72, 0x5b, 0x7b,
0xa4, 0x1c, 0xa8, 0x13, 0x1b, 0xa1, 0x12, 0xa9, 0x04, 0xb4, 0x0a, 0xb8, 0x02, 0xb1, 0x0b, 0xb9,
0xe4, 0xcc, 0xe8, 0xc0, 0xc9, 0xe1, 0xc1, 0xe9, 0xd4, 0xf4, 0xd8, 0xf8, 0xd1, 0xf1, 0xd9, 0xf9,
0x33, 0x88, 0x3c, 0x84, 0x8d, 0x35, 0x85, 0x3d, 0x90, 0x23, 0x9c, 0x2c, 0x95, 0x25, 0x9d, 0x2d,
0x63, 0x4a, 0x6c, 0x44, 0x4d, 0x65, 0x45, 0x6d, 0x53, 0x73, 0x5c, 0x7c, 0x55, 0x75, 0x5d, 0x7d,
0xa0, 0x1a, 0xac, 0x14, 0x1d, 0xa5, 0x15, 0xad, 0x03, 0xb0, 0x0c, 0xbc, 0x05, 0xb5, 0x0d, 0xbd,
0xe0, 0xc8, 0xec, 0xc4, 0xcd, 0xe5, 0xc5, 0xed, 0xd0, 0xf0, 0xdc, 0xfc, 0xd5, 0xf5, 0xdd, 0xfd,
};
#ifdef ___SKINNY_LOOP
/*
* Round Constants
*/
unsigned char RC[]
= {
0x01, 0x00, 0x03, 0x00, 0x07, 0x00, 0x0f, 0x00, 0x0f, 0x01, 0x0e, 0x03, 0x0d, 0x03, 0x0b, 0x03,
0x07, 0x03, 0x0f, 0x02, 0x0e, 0x01, 0x0c, 0x03, 0x09, 0x03, 0x03, 0x03, 0x07, 0x02, 0x0e, 0x00,
0x0d, 0x01, 0x0a, 0x03, 0x05, 0x03, 0x0b, 0x02, 0x06, 0x01, 0x0c, 0x02, 0x08, 0x01, 0x00, 0x03,
0x01, 0x02, 0x02, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x07, 0x01, 0x0e, 0x02, 0x0c, 0x01, 0x08, 0x03,
0x01, 0x03, 0x03, 0x02, 0x06, 0x00, 0x0d, 0x00, 0x0b, 0x01, 0x06, 0x03, 0x0d, 0x02, 0x0a, 0x01,
#ifdef ___NUM_OF_ROUNDS_56
0x04, 0x03, 0x09, 0x02, 0x02, 0x01, 0x04, 0x02, 0x08, 0x00, 0x01, 0x01, 0x02, 0x02, 0x04, 0x00,
0x09, 0x00, 0x03, 0x01, 0x06, 0x02, 0x0c, 0x00, 0x09, 0x01, 0x02, 0x03, 0x05, 0x02, 0x0a, 0x00,
#endif
};
#endif
extern void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2);
extern void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys);
#ifdef ___SKINNY_LOOP
extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC);
#else
extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys);
#endif
void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K)
{
uint32_t *pt = (uint32_t*)&pskinny_ctrl->roundKeys[0];
pt[0] = *(uint32_t*)(&CNT[0]);
pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]);
pt[4] = *(uint32_t*)(&T[0]);
pack_word(T[7], T[4], T[5], T[6], pt[5]);
pt[6] = *(uint32_t*)(&T[8]);
pack_word(T[15], T[12], T[13], T[14], pt[7]);
pt[8] = *(uint32_t*)(&K[0]);
pack_word(K[7], K[4], K[5], K[6], pt[9]);
pt[10] = *(uint32_t*)(&K[8]);
pack_word(K[15], K[12], K[13], K[14], pt[11]);
#ifdef ___SKINNY_LOOP
RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys, RC);
#else
RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys);
#endif
RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys);
Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2);
pskinny_ctrl->func_skinny_128_384_enc = skinny_128_384_enc12_12;
}
void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K)
{
(void)K;
uint32_t *pt = &pskinny_ctrl->roundKeys[0];
pt[0] = *(uint32_t*)(&CNT[0]);
pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]);
pt[4] = *(uint32_t*)(&T[0]);
pack_word(T[7], T[4], T[5], T[6], pt[5]);
pt[6] = *(uint32_t*)(&T[8]);
pack_word(T[15], T[12], T[13], T[14], pt[7]);
RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys);
Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2);
}
extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K)
{
(void)T;
(void)K;
uint32_t *pt = &pskinny_ctrl->roundKeys[0];
pt[0] = *(uint32_t*)(&CNT[0]);
pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]);
Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2);
}
#define PERMUTATION_TK1() \
\
/* permutation */ \
{ \
unsigned char tmp0 = roundKeys[0]; \
unsigned char tmp1 = roundKeys[1]; \
unsigned char tmp2 = roundKeys[2]; \
unsigned char tmp3 = roundKeys[3]; \
unsigned char tmp4 = roundKeys[4]; \
unsigned char tmp5 = roundKeys[5]; \
unsigned char tmp6 = roundKeys[6]; \
unsigned char tmp7 = roundKeys[7]; \
\
unsigned char* dst = &roundKeys[8]; \
\
/* 5 7 2 3 6 0 4 1 */ \
*dst++ = tmp1; \
*dst++ = tmp4; \
*dst++ = tmp0; \
*dst++ = tmp6; \
*dst++ = tmp3; \
*dst++ = tmp2; \
*dst++ = tmp7; \
*dst++ = tmp5; \
\
/* 2 5 0 6 7 1 3 4 */ \
*dst++ = tmp4; \
*dst++ = tmp3; \
*dst++ = tmp1; \
*dst++ = tmp7; \
*dst++ = tmp6; \
*dst++ = tmp0; \
*dst++ = tmp5; \
*dst++ = tmp2; \
\
/* 0 2 1 7 5 4 6 3 */ \
*dst++ = tmp3; \
*dst++ = tmp6; \
*dst++ = tmp4; \
*dst++ = tmp5; \
*dst++ = tmp7; \
*dst++ = tmp1; \
*dst++ = tmp2; \
*dst++ = tmp0; \
\
/* 1 0 4 5 2 3 7 6 */ \
*dst++ = tmp6; \
*dst++ = tmp7; \
*dst++ = tmp3; \
*dst++ = tmp2; \
*dst++ = tmp5; \
*dst++ = tmp4; \
*dst++ = tmp0; \
*dst++ = tmp1; \
\
/* 4 1 3 2 0 6 5 7 */ \
*dst++ = tmp7; \
*dst++ = tmp5; \
*dst++ = tmp6; \
*dst++ = tmp0; \
*dst++ = tmp2; \
*dst++ = tmp3; \
*dst++ = tmp1; \
*dst++ = tmp4; \
\
/* 3 4 6 0 1 7 2 5 */ \
*dst++ = tmp5; \
*dst++ = tmp2; \
*dst++ = tmp7; \
*dst++ = tmp1; \
*dst++ = tmp0; \
*dst++ = tmp6; \
*dst++ = tmp4; \
*dst++ = tmp3; \
\
/* 6 3 7 1 4 5 0 2 */ \
*dst++ = tmp2; \
*dst++ = tmp0; \
*dst++ = tmp5; \
*dst++ = tmp4; \
*dst++ = tmp1; \
*dst++ = tmp7; \
*dst++ = tmp3; \
*dst++ = tmp6; \
}
#define SBOX_0(b0, b1, b2, b3) \
\
t0 = sbox[b0]; \
t1 = sbox[b1]; \
t2 = sbox[b2]; \
t3 = sbox[b3]; \
\
b0 = (uint8_t)t0; \
b1 = (uint8_t)t1; \
b2 = (uint8_t)t2; \
b3 = (uint8_t)t3;
#define SBOX_8(b0, b1, b2, b3) \
\
t0 = sbox[b0]; \
t1 = sbox[b1]; \
t2 = sbox[b2]; \
t3 = sbox[b3]; \
\
b0 = (uint8_t)t3; \
b1 = (uint8_t)t0; \
b2 = (uint8_t)t1; \
b3 = (uint8_t)t2;
#define SBOX_16(b0, b1, b2, b3) \
\
t0 = sbox2[b0]; /* AC(c2) */ \
t1 = sbox[b1]; \
t2 = sbox[b2]; \
t3 = sbox[b3]; \
\
b0 = (uint8_t)t2; \
b1 = (uint8_t)t3; \
b2 = (uint8_t)t0; \
b3 = (uint8_t)t1;
#define SBOX_24(b0, b1, b2, b3) \
\
t0 = sbox[b0]; \
t1 = sbox[b1]; \
t2 = sbox[b2]; \
t3 = sbox[b3]; \
\
b0 = (uint8_t)t1; \
b1 = (uint8_t)t2; \
b2 = (uint8_t)t3; \
b3 = (uint8_t)t0;
#ifdef ___ENABLE_DWORD_CAST
#define SKINNY_MAIN() \
{ \
\
/* odd */ \
\
/* LUT(with ShiftRows & AC(c2))*/ \
\
SBOX_0( block[0], block[1], block[2], block[3]); \
SBOX_8( block[4], block[5], block[6], block[7]); \
SBOX_16(block[8], block[9], block[10], block[11]); \
SBOX_24(block[12], block[13], block[14], block[15]); \
\
/* TK1^TK2^TK3^AC(c0 c1) */ \
\
t1 = *(uint64_t*)&block[0]; \
t1 ^= *tk1++; \
t1 ^= *tk2++; \
\
/* MC */ \
\
t2 = *(uint64_t*)&block[8]; \
t0 = t2 >> 32; \
\
/* 0^2 */ \
t3 = t1 ^ t2; \
\
/* 1^2 */ \
t2 = (t1 >> 32) ^ t2; \
\
/* 0^2^3 */ \
t0 = t0 ^ t3; \
\
*(uint32_t*)&block[0] = (uint32_t)t0; \
*(uint32_t*)&block[4] = (uint32_t)t1; \
*(uint32_t*)&block[8] = (uint32_t)t2; \
*(uint32_t*)&block[12] = (uint32_t)t3; \
\
/* even */ \
\
/* LUT(with ShiftRows & AC(c2))*/ \
\
SBOX_0( block[0], block[1], block[2], block[3]); \
SBOX_8( block[4], block[5], block[6], block[7]); \
SBOX_16(block[8], block[9], block[10], block[11]); \
SBOX_24(block[12], block[13], block[14], block[15]); \
\
/* TK2^TK3^AC(c0 c1) */ \
\
t1 = *(uint64_t*)&block[0]; \
t1 ^= *tk2++; \
\
/* MC */ \
\
t2 = *(uint64_t*)&block[8]; \
t0 = t2 >> 32; \
\
/* 0^2 */ \
t3 = t1 ^ t2; \
\
/* 1^2 */ \
t2 = (t1 >> 32) ^ t2; \
\
/* 0^2^3 */ \
t0 = t0 ^ t3; \
\
*(uint32_t*)&block[0] = (uint32_t)t0; \
*(uint32_t*)&block[4] = (uint32_t)t1; \
*(uint32_t*)&block[8] = (uint32_t)t2; \
*(uint32_t*)&block[12] = (uint32_t)t3; \
}
#ifndef ___SKINNY_LOOP
void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2)
{
uint64_t *tk1;
uint64_t *tk2;
uint64_t t0; // used in MACRO
uint64_t t1; // used in MACRO
uint64_t t2; // used in MACRO
uint64_t t3; // used in MACRO
// TK1
PERMUTATION_TK1();
// SB+AC+ShR+MC
tk2 = (uint64_t*)&roundKeys[64];
tk1 = (uint64_t*)&roundKeys[0];
// 1st, ...,16th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
tk1 = (uint64_t*)&roundKeys[0];
// 17th, ...,32th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
tk1 = (uint64_t*)&roundKeys[0];
// 33th, ...,40th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
#ifdef ___NUM_OF_ROUNDS_56
// 41th, ...,48th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
tk1 = (uint64_t*)&roundKeys[0];
// 49th, ... ,56th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
#endif
}
#else /* ___SKINNY_LOOP */
void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2)
{
uint64_t *tk1;
uint64_t *tk2;
uint64_t t0; // used in MACRO
uint64_t t1; // used in MACRO
uint64_t t2; // used in MACRO
uint64_t t3; // used in MACRO
// TK1
PERMUTATION_TK1();
// SB+AC+ShR+MC
tk2 = (uint64_t*)&roundKeys[64];
// 1st, ... ,32th or 48th round
#ifndef ___NUM_OF_ROUNDS_56
for(int j=0;j<2;j++)
#else
for(int j=0;j<3;j++)
#endif
{
tk1 = (uint64_t*)&roundKeys[0];
for(int i=0;i<8;i++)
{
SKINNY_MAIN();
}
}
// 33th , ... ,40th or 49th, .... ,56th round
{
tk1 = (uint64_t*)&roundKeys[0];
for(int i=0;i<4;i++)
{
SKINNY_MAIN();
}
}
}
#endif /* ___SKINNY_LOOP */
#else /* ___ENABLE_DWORD_CAST */
#define SKINNY_MAIN() \
{ \
\
/* odd */ \
\
/* LUT(with ShiftRows & AC(c2))*/ \
\
SBOX_0( block[0], block[1], block[2], block[3]); \
SBOX_8( block[4], block[5], block[6], block[7]); \
SBOX_16(block[8], block[9], block[10], block[11]); \
SBOX_24(block[12], block[13], block[14], block[15]); \
\
/* TK1^TK2^TK3^AC(c0 c1) */ \
\
t1 = *(uint32_t*)&block[0]; \
t0 = *(uint32_t*)&block[4]; \
t1 ^= *tk1++; \
t1 ^= *tk2++; \
t0 ^= *tk1++; \
t0 ^= *tk2++; \
\
/* MC */ \
\
t2 = *(uint32_t*)&block[8]; \
t4 = *(uint32_t*)&block[12]; \
\
/* 0^2 */ \
t3 = t1 ^ t2; \
\
/* 1^2 */ \
t2 = t0 ^ t2; \
\
/* 0^2^3 */ \
t0 = t3 ^ t4; \
\
*(uint32_t*)&block[0] = t0; \
*(uint32_t*)&block[4] = t1; \
*(uint32_t*)&block[8] = t2; \
*(uint32_t*)&block[12] = t3; \
\
/* even */ \
\
/* LUT(with ShiftRows & AC(c2))*/ \
\
SBOX_0( block[0], block[1], block[2], block[3]); \
SBOX_8( block[4], block[5], block[6], block[7]); \
SBOX_16(block[8], block[9], block[10], block[11]); \
SBOX_24(block[12], block[13], block[14], block[15]); \
\
/* TK2^TK3^AC(c0 c1) */ \
\
t1 = *(uint32_t*)&block[0]; \
t0 = *(uint32_t*)&block[4]; \
t1 ^= *tk2++; \
t0 ^= *tk2++; \
\
/* MC */ \
\
t2 = *(uint32_t*)&block[8]; \
t4 = *(uint32_t*)&block[12]; \
\
/* 0^2 */ \
t3 = t1 ^ t2; \
\
/* 1^2 */ \
t2 = t0 ^ t2; \
\
/* 0^2^3 */ \
t0 = t3 ^ t4; \
\
*(uint32_t*)&block[0] = t0; \
*(uint32_t*)&block[4] = t1; \
*(uint32_t*)&block[8] = t2; \
*(uint32_t*)&block[12] = t3; \
}
#ifndef ___SKINNY_LOOP
void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2)
{
uint32_t *tk1;
uint32_t *tk2;
uint32_t t0; // used in MACRO
uint32_t t1; // used in MACRO
uint32_t t2; // used in MACRO
uint32_t t3; // used in MACRO
uint32_t t4; // used in MACRO
// TK1
PERMUTATION_TK1();
// SB+AC+ShR+MC
tk2 = (uint32_t*)&roundKeys[64];
tk1 = (uint32_t*)&roundKeys[0];
// 1st, ...,16th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
tk1 = (uint32_t*)&roundKeys[0];
// 17th, ...,32th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
tk1 = (uint32_t*)&roundKeys[0];
// 33th, ...,40th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
#ifdef ___NUM_OF_ROUNDS_56
// 41th, ...,48th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
tk1 = (uint32_t*)&roundKeys[0];
// 49th, ... ,56th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
#endif
}
#else /* ___SKINNY_LOOP */
void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2)
{
uint32_t *tk1;
uint32_t *tk2;
uint32_t t0; // used in MACRO
uint32_t t1; // used in MACRO
uint32_t t2; // used in MACRO
uint32_t t3; // used in MACRO
uint32_t t4; // used in MACRO
// TK1
PERMUTATION_TK1();
// SB+AC+ShR+MC
tk2 = (uint32_t*)&roundKeys[64];
// 1st, ... ,32th or 48th round
#ifndef ___NUM_OF_ROUNDS_56
for(int j=0;j<2;j++)
#else
for(int j=0;j<3;j++)
#endif
{
tk1 = (uint32_t*)&roundKeys[0];
for(int i=0;i<8;i++)
{
SKINNY_MAIN();
}
}
// 33th , ... ,40th or 49th, .... ,56th round
{
tk1 = (uint32_t*)&roundKeys[0];
for(int i=0;i<4;i++)
{
SKINNY_MAIN();
}
}
}
#endif /* ___SKINNY_LOOP */
#endif /* ___ENABLE_DWORD_CAST */
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
/*
* Date: 29 November 2018
* Contact: Thomas Peyrin - thomas.peyrin@gmail.com
* Mustafa Khairallah - mustafam001@e.ntu.edu.sg
*/
#include "crypto_aead.h"
#include "api.h"
#include "skinny.h"
#include <stdio.h>
#include <stdlib.h>
void pad (const unsigned char* m, unsigned char* mp, int len8) {
#ifdef ___ENABLE_DWORD_CAST
if (0 == len8) {
*(uint64_t*)(&mp[0]) = 0;
*(uint64_t*)(&mp[8]) = 0;
} else if (8 > len8) {
*(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]) & (0xffffffffffffffff >> (64 - len8*8));
*(uint64_t*)(&mp[8]) = 0;
mp[15] = len8;
} else if (8 == len8) {
*(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]);
*(uint64_t*)(&mp[8]) = 0;
mp[15] = 8;
} else if (16 > len8) {
*(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]);
*(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]) & (0xffffffffffffffff >> (128 - len8*8));
mp[15] = len8;
} else {
*(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]);
*(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]);
}
#else
if (0 == len8) {
*(uint32_t*)(&mp[0]) = 0;
*(uint32_t*)(&mp[4]) = 0;
*(uint32_t*)(&mp[8]) = 0;
*(uint32_t*)(&mp[12]) = 0;
} else if (4 > len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]) & (0xffffffff >> (32 - len8*8));
*(uint32_t*)(&mp[4]) = 0;
*(uint32_t*)(&mp[8]) = 0;
*(uint32_t*)(&mp[12]) = 0;
mp[15] = len8;
} else if (4 == len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = 0;
*(uint32_t*)(&mp[8]) = 0;
*(uint32_t*)(&mp[12]) = 0;
mp[15] = 4;
} else if (8 > len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]) & (0xffffffff >> (64 - len8*8));
*(uint32_t*)(&mp[8]) = 0;
*(uint32_t*)(&mp[12]) = 0;
mp[15] = len8;
} else if (8 == len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]);
*(uint32_t*)(&mp[8]) = 0;
*(uint32_t*)(&mp[12]) = 0;
mp[15] = 8;
} else if (12 > len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]);
*(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]) & (0xffffffff >> (96 - len8*8));
*(uint32_t*)(&mp[12]) = 0;
mp[15] = len8;
} else if (12 == len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]);
*(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]);
*(uint32_t*)(&mp[12]) = 0;
mp[15] = 12;
} else if (16 > len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]);
*(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]);
*(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]) & (0xffffffff >> (128 - len8*8));
mp[15] = len8;
} else {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]);
*(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]);
*(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]);
}
#endif
}
void g8A (unsigned char* s, unsigned char* c) {
#ifdef ___ENABLE_DWORD_CAST
uint64_t s0 = *(uint64_t*)(&s[0]);
uint64_t s1 = *(uint64_t*)(&s[8]);
uint64_t c0, c1;
c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080);
c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080);
*(uint64_t*)(&c[0]) = c0;
*(uint64_t*)(&c[8]) = c1;
#else
uint32_t s0 = *(uint32_t*)(&s[0]);
uint32_t s1 = *(uint32_t*)(&s[4]);
uint32_t s2 = *(uint32_t*)(&s[8]);
uint32_t s3 = *(uint32_t*)(&s[12]);
uint32_t c0, c1, c2, c3;
c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080);
c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080);
c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080);
c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080);
*(uint32_t*)(&c[0]) = c0;
*(uint32_t*)(&c[4]) = c1;
*(uint32_t*)(&c[8]) = c2;
*(uint32_t*)(&c[12]) = c3;
#endif
}
void g8A_for_Tag_Generation (unsigned char* s, unsigned char* c) {
#ifdef ___ENABLE_DWORD_CAST
uint64_t s0 = *(uint64_t*)(&s[0]);
uint64_t s1 = *(uint64_t*)(&s[8]);
uint64_t c0, c1;
c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080);
c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080);
// use byte access because of memory alignment.
// c is not always in word(4 byte) alignment.
c[0] = c0 &0xFF;
c[1] = (c0>>8) &0xFF;
c[2] = (c0>>16)&0xFF;
c[3] = (c0>>24)&0xFF;
c[4] = (c0>>32)&0xFF;
c[5] = (c0>>40)&0xFF;
c[6] = (c0>>48)&0xFF;
c[7] = c0>>56;
c[8] = c1 &0xFF;
c[9] = (c1>>8) &0xFF;
c[10] = (c1>>16)&0xFF;
c[11] = (c1>>24)&0xFF;
c[12] = (c1>>32)&0xFF;
c[13] = (c1>>40)&0xFF;
c[14] = (c1>>48)&0xFF;
c[15] = c1>>56;
#else
uint32_t s0 = *(uint32_t*)(&s[0]);
uint32_t s1 = *(uint32_t*)(&s[4]);
uint32_t s2 = *(uint32_t*)(&s[8]);
uint32_t s3 = *(uint32_t*)(&s[12]);
uint32_t c0, c1, c2, c3;
c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080);
c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080);
c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080);
c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080);
// use byte access because of memory alignment.
// c is not always in word(4 byte) alignment.
c[0] = c0 &0xFF;
c[1] = (c0>>8) &0xFF;
c[2] = (c0>>16)&0xFF;
c[3] = c0>>24;
c[4] = c1 &0xFF;
c[5] = (c1>>8) &0xFF;
c[6] = (c1>>16)&0xFF;
c[7] = c1>>24;
c[8] = c2 &0xFF;
c[9] = (c2>>8) &0xFF;
c[10] = (c2>>16)&0xFF;
c[11] = c2>>24;
c[12] = c3 &0xFF;
c[13] = (c3>>8) &0xFF;
c[14] = (c3>>16)&0xFF;
c[15] = c3>>24;
#endif
}
void rho_ad_eqov16 (
const unsigned char* m,
unsigned char* s) {
#ifdef ___ENABLE_DWORD_CAST
*(uint64_t*)(&s[0]) ^= *(uint64_t*)(&m[0]);
*(uint64_t*)(&s[8]) ^= *(uint64_t*)(&m[8]);
#else
*(uint32_t*)(&s[0]) ^= *(uint32_t*)(&m[0]);
*(uint32_t*)(&s[4]) ^= *(uint32_t*)(&m[4]);
*(uint32_t*)(&s[8]) ^= *(uint32_t*)(&m[8]);
*(uint32_t*)(&s[12]) ^= *(uint32_t*)(&m[12]);
#endif
}
void rho_ad_ud16 (
const unsigned char* m,
unsigned char* s,
int len8) {
unsigned char mp [16];
pad(m,mp,len8);
#ifdef ___ENABLE_DWORD_CAST
*(uint64_t*)(&s[0]) ^= *(uint64_t*)(&mp[0]);
*(uint64_t*)(&s[8]) ^= *(uint64_t*)(&mp[8]);
#else
*(uint32_t*)(&s[0]) ^= *(uint32_t*)(&mp[0]);
*(uint32_t*)(&s[4]) ^= *(uint32_t*)(&mp[4]);
*(uint32_t*)(&s[8]) ^= *(uint32_t*)(&mp[8]);
*(uint32_t*)(&s[12]) ^= *(uint32_t*)(&mp[12]);
#endif
}
void rho_eqov16 (
const unsigned char* m,
unsigned char* c,
unsigned char* s) {
g8A(s,c);
#ifdef ___ENABLE_DWORD_CAST
uint64_t c0 = *(uint64_t*)(&c[0]);
uint64_t c1 = *(uint64_t*)(&c[8]);
uint64_t s0 = *(uint64_t*)(&s[0]);
uint64_t s1 = *(uint64_t*)(&s[8]);
uint64_t m0 = *(uint64_t*)(&m[0]);
uint64_t m1 = *(uint64_t*)(&m[8]);
s0 ^= m0;
s1 ^= m1;
c0 ^= m0;
c1 ^= m1;
*(uint64_t*)(&s[0]) = s0;
*(uint64_t*)(&s[8]) = s1;
*(uint64_t*)(&c[0]) = c0;
*(uint64_t*)(&c[8]) = c1;
#else
uint32_t c0 = *(uint32_t*)(&c[0]);
uint32_t c1 = *(uint32_t*)(&c[4]);
uint32_t c2 = *(uint32_t*)(&c[8]);
uint32_t c3 = *(uint32_t*)(&c[12]);
uint32_t s0 = *(uint32_t*)(&s[0]);
uint32_t s1 = *(uint32_t*)(&s[4]);
uint32_t s2 = *(uint32_t*)(&s[8]);
uint32_t s3 = *(uint32_t*)(&s[12]);
uint32_t m0 = *(uint32_t*)(&m[0]);
uint32_t m1 = *(uint32_t*)(&m[4]);
uint32_t m2 = *(uint32_t*)(&m[8]);
uint32_t m3 = *(uint32_t*)(&m[12]);
s0 ^= m0;
s1 ^= m1;
s2 ^= m2;
s3 ^= m3;
c0 ^= m0;
c1 ^= m1;
c2 ^= m2;
c3 ^= m3;
*(uint32_t*)(&s[0]) = s0;
*(uint32_t*)(&s[4]) = s1;
*(uint32_t*)(&s[8]) = s2;
*(uint32_t*)(&s[12]) = s3;
*(uint32_t*)(&c[0]) = c0;
*(uint32_t*)(&c[4]) = c1;
*(uint32_t*)(&c[8]) = c2;
*(uint32_t*)(&c[12]) = c3;
#endif
}
void rho_ud16 (
const unsigned char* m,
unsigned char* c,
unsigned char* s,
int len8) {
unsigned char mp [16];
pad(m,mp,len8);
g8A(s,c);
#ifdef ___ENABLE_DWORD_CAST
uint64_t mp0 = *(uint64_t*)&mp[0];
uint64_t mp1 = *(uint64_t*)&mp[8];
uint64_t c0 = *(uint64_t*)&c[0];
uint64_t c1 = *(uint64_t*)&c[8];
*(uint64_t*)(&s[0]) ^= mp0;
*(uint64_t*)(&s[8]) ^= mp1;
if (0 == len8) {
c0 = 0;
c1 = 0;
} else if (8 > len8) {
c0 = c0 ^ (mp0 & 0xffffffffffffffff >> (64 - (len8*8)));
c0 = c0 ^ (c0 & 0xffffffffffffffff << ( (len8*8)));
c1 = 0;
} else if (8 == len8) {
c0 = c0 ^ mp0;
c1 = 0;
} else if (16 > len8) {
len8 -= 8;
c0 = c0 ^ mp0;
c1 = c1 ^ (mp1 & 0xffffffffffffffff >> (64 - (len8*8)));
c1 = c1 ^ (c1 & 0xffffffffffffffff << ( (len8*8)));
} else {
c0 = c0 ^ mp0;
c1 = c1 ^ mp1;
}
*(uint64_t*)&c[0] = c0;
*(uint64_t*)&c[8] = c1;
#else
uint32_t mp0 = *(uint32_t*)&mp[0];
uint32_t mp1 = *(uint32_t*)&mp[4];
uint32_t mp2 = *(uint32_t*)&mp[8];
uint32_t mp3 = *(uint32_t*)&mp[12];
uint32_t c0 = *(uint32_t*)&c[0];
uint32_t c1 = *(uint32_t*)&c[4];
uint32_t c2 = *(uint32_t*)&c[8];
uint32_t c3 = *(uint32_t*)&c[12];
*(uint32_t*)(&s[0]) ^= mp0;
*(uint32_t*)(&s[4]) ^= mp1;
*(uint32_t*)(&s[8]) ^= mp2;
*(uint32_t*)(&s[12]) ^= mp3;
if (0 == len8) {
c0 = 0;
c1 = 0;
c2 = 0;
c3 = 0;
} else if (4 > len8) {
c0 = c0 ^ (mp0 & 0xffffffff >> (32 - (len8*8)));
c0 = c0 ^ (c0 & 0xffffffff << ( (len8*8)));
c1 = 0;
c2 = 0;
c3 = 0;
} else if (4 == len8) {
c0 = c0 ^ mp0;
c1 = 0;
c2 = 0;
c3 = 0;
} else if (8 > len8) {
len8 -= 4;
c0 = c0 ^ mp0;
c1 = c1 ^ (mp1 & 0xffffffff >> (32 - (len8*8)));
c1 = c1 ^ (c1 & 0xffffffff << ( (len8*8)));
c2 = 0;
c3 = 0;
} else if (8 == len8) {
c0 = c0 ^ mp0;
c1 = c1 ^ mp1;
c2 = 0;
c3 = 0;
} else if (12 > len8) {
len8 -= 8;
c0 = c0 ^ mp0;
c1 = c1 ^ mp1;
c2 = c2 ^ (mp2 & 0xffffffff >> (32 - (len8*8)));
c2 = c2 ^ (c2 & 0xffffffff << ( (len8*8)));
c3 = 0;
} else if (12 == len8) {
c0 = c0 ^ mp0;
c1 = c1 ^ mp1;
c2 = c2 ^ mp2;
c3 = 0;
} else if (16 > len8) {
len8 -= 12;
c0 = c0 ^ mp0;
c1 = c1 ^ mp1;
c2 = c2 ^ mp2;
c3 = c3 ^ (mp3 & 0xffffffff >> (32 - (len8*8)));
c3 = c3 ^ (c3 & 0xffffffff << ( (len8*8)));
} else {
c0 = c0 ^ mp0;
c1 = c1 ^ mp1;
c2 = c2 ^ mp2;
c3 = c3 ^ mp3;
}
*(uint32_t*)&c[0] = c0;
*(uint32_t*)&c[4] = c1;
*(uint32_t*)&c[8] = c2;
*(uint32_t*)&c[12] = c3;
#endif
}
void irho_eqov16 (
unsigned char* m,
const unsigned char* c,
unsigned char* s) {
g8A(s,m);
#ifdef ___ENABLE_DWORD_CAST
uint64_t c0 = *(uint64_t*)(&c[0]);
uint64_t c1 = *(uint64_t*)(&c[8]);
uint64_t s0 = *(uint64_t*)(&s[0]);
uint64_t s1 = *(uint64_t*)(&s[8]);
uint64_t m0 = *(uint64_t*)(&m[0]);
uint64_t m1 = *(uint64_t*)(&m[8]);
s0 ^= c0 ^ m0;
s1 ^= c1 ^ m1;
m0 ^= c0;
m1 ^= c1;
*(uint64_t*)(&s[0]) = s0;
*(uint64_t*)(&s[8]) = s1;
*(uint64_t*)(&m[0]) = m0;
*(uint64_t*)(&m[8]) = m1;
#else
uint32_t c0 = *(uint32_t*)(&c[0]);
uint32_t c1 = *(uint32_t*)(&c[4]);
uint32_t c2 = *(uint32_t*)(&c[8]);
uint32_t c3 = *(uint32_t*)(&c[12]);
uint32_t s0 = *(uint32_t*)(&s[0]);
uint32_t s1 = *(uint32_t*)(&s[4]);
uint32_t s2 = *(uint32_t*)(&s[8]);
uint32_t s3 = *(uint32_t*)(&s[12]);
uint32_t m0 = *(uint32_t*)(&m[0]);
uint32_t m1 = *(uint32_t*)(&m[4]);
uint32_t m2 = *(uint32_t*)(&m[8]);
uint32_t m3 = *(uint32_t*)(&m[12]);
s0 ^= c0 ^ m0;
s1 ^= c1 ^ m1;
s2 ^= c2 ^ m2;
s3 ^= c3 ^ m3;
m0 ^= c0;
m1 ^= c1;
m2 ^= c2;
m3 ^= c3;
*(uint32_t*)(&s[0]) = s0;
*(uint32_t*)(&s[4]) = s1;
*(uint32_t*)(&s[8]) = s2;
*(uint32_t*)(&s[12]) = s3;
*(uint32_t*)(&m[0]) = m0;
*(uint32_t*)(&m[4]) = m1;
*(uint32_t*)(&m[8]) = m2;
*(uint32_t*)(&m[12]) = m3;
#endif
}
void irho_ud16 (
unsigned char* m,
const unsigned char* c,
unsigned char* s,
int len8) {
unsigned char cp [16];
pad(c,cp,len8);
g8A(s,m);
#ifdef ___ENABLE_DWORD_CAST
uint64_t cp0 = *(uint64_t*)&cp[0];
uint64_t cp1 = *(uint64_t*)&cp[8];
uint64_t m0 = *(uint64_t*)&m[0];
uint64_t m1 = *(uint64_t*)&m[8];
uint64_t s0 = *(uint64_t*)&s[0];
uint64_t s1 = *(uint64_t*)&s[8];
s0 ^= cp0;
s1 ^= cp1;
if (0 == len8) {
m0 = 0;
m1 = 0;
} else if (8 > len8) {
s0 = s0 ^ (m0 & 0xffffffffffffffff >> (64 - (len8*8)));
m0 = m0 ^ (cp0 & 0xffffffffffffffff >> (64 - (len8*8)));
m0 = m0 ^ (m0 & 0xffffffffffffffff << ( (len8*8)));
m1 = 0;
} else if (8 == len8) {
s0 = s0 ^ m0;
m0 = m0 ^ cp0;
m1 = 0;
} else if (16 > len8) {
len8 -= 8;
s0 = s0 ^ m0;
s1 = s1 ^ (m1 & 0xffffffffffffffff >> (64 - (len8*8)));
m0 = m0 ^ cp0;
m1 = m1 ^ (cp1 & 0xffffffffffffffff >> (64 - (len8*8)));
m1 = m1 ^ (m1 & 0xffffffffffffffff << ( (len8*8)));
} else {
s0 = s0 ^ m0;
s1 = s1 ^ m1;
m0 = m0 ^ cp0;
m1 = m1 ^ cp1;
}
*(uint64_t*)&s[0] = s0;
*(uint64_t*)&s[8] = s1;
*(uint64_t*)&m[0] = m0;
*(uint64_t*)&m[8] = m1;
#else
uint32_t cp0 = *(uint32_t*)&cp[0];
uint32_t cp1 = *(uint32_t*)&cp[4];
uint32_t cp2 = *(uint32_t*)&cp[8];
uint32_t cp3 = *(uint32_t*)&cp[12];
uint32_t m0 = *(uint32_t*)&m[0];
uint32_t m1 = *(uint32_t*)&m[4];
uint32_t m2 = *(uint32_t*)&m[8];
uint32_t m3 = *(uint32_t*)&m[12];
uint32_t s0 = *(uint32_t*)&s[0];
uint32_t s1 = *(uint32_t*)&s[4];
uint32_t s2 = *(uint32_t*)&s[8];
uint32_t s3 = *(uint32_t*)&s[12];
s0 ^= cp0;
s1 ^= cp1;
s2 ^= cp2;
s3 ^= cp3;
if (0 == len8) {
m0 = 0;
m1 = 0;
m2 = 0;
m3 = 0;
} else if (4 > len8) {
s0 = s0 ^ (m0 & 0xffffffff >> (32 - (len8*8)));
m0 = m0 ^ (cp0 & 0xffffffff >> (32 - (len8*8)));
m0 = m0 ^ (m0 & 0xffffffff << ( (len8*8)));
m1 = 0;
m2 = 0;
m3 = 0;
} else if (4 == len8) {
s0 = s0 ^ m0;
m0 = m0 ^ cp0;
m1 = 0;
m2 = 0;
m3 = 0;
} else if (8 > len8) {
len8 -= 4;
s0 = s0 ^ m0;
s1 = s1 ^ (m1 & 0xffffffff >> (32 - (len8*8)));
m0 = m0 ^ cp0;
m1 = m1 ^ (cp1 & 0xffffffff >> (32 - (len8*8)));
m1 = m1 ^ (m1 & 0xffffffff << ( (len8*8)));
m2 = 0;
m3 = 0;
} else if (8 == len8) {
s0 = s0 ^ m0;
s1 = s1 ^ m1;
m0 = m0 ^ cp0;
m1 = m1 ^ cp1;
m2 = 0;
m3 = 0;
} else if (12 > len8) {
len8 -= 8;
s0 = s0 ^ m0;
s1 = s1 ^ m1;
s2 = s2 ^ (m2 & 0xffffffff >> (32 - (len8*8)));
m0 = m0 ^ cp0;
m1 = m1 ^ cp1;
m2 = m2 ^ (cp2 & 0xffffffff >> (32 - (len8*8)));
m2 = m2 ^ (m2 & 0xffffffff << ( (len8*8)));
m3 = 0;
} else if (12 == len8) {
s0 = s0 ^ m0;
s1 = s1 ^ m1;
s2 = s2 ^ m2;
m0 = m0 ^ cp0;
m1 = m1 ^ cp1;
m2 = m2 ^ cp2;
m3 = 0;
} else if (16 > len8) {
len8 -= 12;
s0 = s0 ^ m0;
s1 = s1 ^ m1;
s2 = s2 ^ m2;
s3 = s3 ^ (m3 & 0xffffffff >> (32 - (len8*8)));
m0 = m0 ^ cp0;
m1 = m1 ^ cp1;
m2 = m2 ^ cp2;
m3 = m3 ^ (cp3 & 0xffffffff >> (32 - (len8*8)));
m3 = m3 ^ (m3 & 0xffffffff << ( (len8*8)));
} else {
s0 = s0 ^ m0;
s1 = s1 ^ m1;
s2 = s2 ^ m2;
s3 = s3 ^ m3;
m0 = m0 ^ cp0;
m1 = m1 ^ cp1;
m2 = m2 ^ cp2;
m3 = m3 ^ cp3;
}
*(uint32_t*)&s[0] = s0;
*(uint32_t*)&s[4] = s1;
*(uint32_t*)&s[8] = s2;
*(uint32_t*)&s[12] = s3;
*(uint32_t*)&m[0] = m0;
*(uint32_t*)&m[4] = m1;
*(uint32_t*)&m[8] = m2;
*(uint32_t*)&m[12] = m3;
#endif
}
void reset_lfsr_gf56 (unsigned char* CNT) {
#ifdef ___ENABLE_DWORD_CAST
*(uint64_t*)(&CNT[0]) = 0x0000000000000001; // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0
#else
*(uint32_t*)(&CNT[0]) = 0x00000001; // CNT3 CNT2 CNT1 CNT0
*(uint32_t*)(&CNT[4]) = 0x00000000; // CNT7 CNT6 CNT5 CNT4
#endif
}
void lfsr_gf56 (unsigned char* CNT) {
#ifdef ___ENABLE_DWORD_CAST
uint64_t C0;
uint64_t fb0;
C0 = *(uint64_t*)(&CNT[0]); // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0
fb0 = 0;
if (CNT[6] & 0x80) {
fb0 = 0x95;
}
C0 = C0 << 1 ^ fb0;
*(uint64_t*)(&CNT[0]) = C0;
#else
uint32_t C0;
uint32_t C1;
uint32_t fb0;
C0 = *(uint32_t*)(&CNT[0]); // CNT3 CNT2 CNT1 CNT0
C1 = *(uint32_t*)(&CNT[4]); // CNT7 CNT6 CNT5 CNT4
fb0 = 0;
if (CNT[6] & 0x80) {
fb0 = 0x95;
}
C1 = C1 << 1 | C0 >> 31;
C0 = C0 << 1 ^ fb0;
*(uint32_t*)(&CNT[0]) = C0;
*(uint32_t*)(&CNT[4]) = C1;
#endif
}
void block_cipher(
unsigned char* s,
const unsigned char* k, unsigned char* T,
unsigned char* CNT, unsigned char D,
skinny_ctrl* p_skinny_ctrl) {
CNT[7] = D;
p_skinny_ctrl->func_skinny_128_384_enc(s, p_skinny_ctrl, CNT, T, k);
}
void nonce_encryption (
const unsigned char* N,
unsigned char* CNT,
unsigned char*s, const unsigned char* k,
unsigned char D,
skinny_ctrl* p_skinny_ctrl) {
block_cipher(s,k,(unsigned char*)N,CNT,D,p_skinny_ctrl);
}
void generate_tag (
unsigned char** c, unsigned char* s,
unsigned long long* clen) {
g8A_for_Tag_Generation(s, *c);
*c = *c + 16;
*c = *c - *clen;
}
unsigned long long msg_encryption (
const unsigned char** M, unsigned char** c,
const unsigned char* N,
unsigned char* CNT,
unsigned char*s, const unsigned char* k,
unsigned char D,
unsigned long long mlen,
skinny_ctrl* l_skinny_ctrl) {
int len8;
if (mlen >= 16) {
len8 = 16;
mlen = mlen - 16;
rho_eqov16(*M, *c, s);
}
else {
len8 = mlen;
mlen = 0;
rho_ud16(*M, *c, s, len8);
}
*c = *c + len8;
*M = *M + len8;
lfsr_gf56(CNT);
if (mlen != 0) {
nonce_encryption(N,CNT,s,k,D,l_skinny_ctrl);
}
return mlen;
}
unsigned long long msg_decryption (
unsigned char** M, const unsigned char** c,
const unsigned char* N,
unsigned char* CNT,
unsigned char*s, const unsigned char* k,
unsigned char D,
unsigned long long clen,
skinny_ctrl* l_skinny_ctrl) {
int len8;
if (clen >= 16) {
len8 = 16;
clen = clen - 16;
irho_eqov16(*M, *c, s);
}
else {
len8 = clen;
clen = 0;
irho_ud16(*M, *c, s, len8);
}
*c = *c + len8;
*M = *M + len8;
lfsr_gf56(CNT);
nonce_encryption(N,CNT,s,k,D,l_skinny_ctrl);
return clen;
}
unsigned long long ad2msg_encryption (
const unsigned char** M,
unsigned char* CNT,
unsigned char*s, const unsigned char* k,
unsigned char D,
unsigned long long mlen,
skinny_ctrl* l_skinny_ctrl) {
unsigned char T [16];
int len8;
if (mlen <= 16) {
len8 = mlen;
mlen = 0;
}
else {
len8 = 16;
mlen = mlen - 16;
}
pad (*M,T,len8);
block_cipher(s,k,T,CNT,D,l_skinny_ctrl);
lfsr_gf56(CNT);
*M = *M + len8;
return mlen;
}
unsigned long long ad_encryption (
const unsigned char** A, unsigned char* s,
const unsigned char* k, unsigned long long adlen,
unsigned char* CNT,
unsigned char D,
skinny_ctrl* l_skinny_ctrl) {
unsigned char T [16];
int len8;
if (adlen >= 16) {
len8 = 16;
adlen = adlen - 16;
rho_ad_eqov16(*A, s);
}
else {
len8 = adlen;
adlen = 0;
rho_ad_ud16(*A, s, len8);
}
*A = *A + len8;
lfsr_gf56(CNT);
if (adlen != 0) {
if (adlen >= 16) {
len8 = 16;
adlen = adlen - 16;
}
else {
len8 = adlen;
adlen = 0;
}
pad(*A, T, len8);
*A = *A + len8;
block_cipher(s,k,T,CNT,D,l_skinny_ctrl);
lfsr_gf56(CNT);
}
return adlen;
}
int crypto_aead_encrypt (
unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec,
const unsigned char* npub,
const unsigned char* k) {
unsigned char s[16];
unsigned char CNT[8];
unsigned char T[16];
const unsigned char* N;
unsigned char w;
unsigned long long xlen;
skinny_ctrl l_skinny_ctrl;
l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12;
(void)nsec;
N = npub;
xlen = mlen;
#ifdef ___ENABLE_DWORD_CAST
*(uint64_t*)(&s[0]) = 0;
*(uint64_t*)(&s[8]) = 0;
#else
*(uint32_t*)(&s[0]) = 0;
*(uint32_t*)(&s[4]) = 0;
*(uint32_t*)(&s[8]) = 0;
*(uint32_t*)(&s[12]) = 0;
#endif
reset_lfsr_gf56(CNT);
w = 48;
if (adlen == 0) {
w = w ^ 2;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
else if (adlen%(32) == 0) {
w = w ^ 8;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
else if (adlen%(32) < 16) {
w = w ^ 2;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
else if (adlen%(32) == 16) {
w = w ^ 0;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
else {
w = w ^ 10;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
if (adlen == 0) { // AD is an empty string
lfsr_gf56(CNT);
}
else while (adlen > 0) {
adlen = ad_encryption(&ad,s,k,adlen,CNT,40,&l_skinny_ctrl);
}
if ((w & 8) == 0) {
xlen = ad2msg_encryption (&m,CNT,s,k,44,xlen,&l_skinny_ctrl);
}
else if (mlen == 0) {
lfsr_gf56(CNT);
}
while (xlen > 0) {
xlen = ad_encryption(&m,s,k,xlen,CNT,44,&l_skinny_ctrl);
}
nonce_encryption(N,CNT,s,k,w,&l_skinny_ctrl);
// Tag generation
g8A(s, T);
m = m - mlen;
l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1;
reset_lfsr_gf56(CNT);
#ifdef ___ENABLE_DWORD_CAST
*(uint64_t*)(&s[0]) = *(uint64_t*)(&T[0]);
*(uint64_t*)(&s[8]) = *(uint64_t*)(&T[8]);
#else
*(uint32_t*)(&s[0]) = *(uint32_t*)(&T[0]);
*(uint32_t*)(&s[4]) = *(uint32_t*)(&T[4]);
*(uint32_t*)(&s[8]) = *(uint32_t*)(&T[8]);
*(uint32_t*)(&s[12]) = *(uint32_t*)(&T[12]);
#endif
*clen = mlen + 16;
if (mlen > 0) {
nonce_encryption(N,CNT,s,k,36,&l_skinny_ctrl);
while (mlen > 16) {
mlen = msg_encryption(&m,&c,N,CNT,s,k,36,mlen,&l_skinny_ctrl);
}
rho_ud16(m, c, s, mlen);
c = c + mlen;
m = m + mlen;
}
// Tag Concatenation
c[0] = T[0];
c[1] = T[1];
c[2] = T[2];
c[3] = T[3];
c[4] = T[4];
c[5] = T[5];
c[6] = T[6];
c[7] = T[7];
c[8] = T[8];
c[9] = T[9];
c[10] = T[10];
c[11] = T[11];
c[12] = T[12];
c[13] = T[13];
c[14] = T[14];
c[15] = T[15];
c = c - *clen;
return 0;
}
int crypto_aead_decrypt(
unsigned char *m,unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c,unsigned long long clen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k) {
unsigned char s[16];
unsigned char CNT[8];
unsigned char T[16];
const unsigned char* N;
unsigned char w;
unsigned long long xlen;
const unsigned char* mauth;
unsigned char* p1;
unsigned char* p2;
skinny_ctrl l_skinny_ctrl;
l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12;
(void)nsec;
mauth = m;
N = npub;
xlen = clen-16;
#ifdef ___ENABLE_DWORD_CAST
*(uint64_t*)(&s[0]) = 0;
*(uint64_t*)(&s[8]) = 0;
#else
*(uint32_t*)(&s[0]) = 0;
*(uint32_t*)(&s[4]) = 0;
*(uint32_t*)(&s[8]) = 0;
*(uint32_t*)(&s[12]) = 0;
#endif
reset_lfsr_gf56(CNT);
w = 48;
if (adlen == 0) {
w = w ^ 2;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
else if (adlen%(32) == 0) {
w = w ^ 8;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
else if (adlen%(32) < 16) {
w = w ^ 2;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
else if (adlen%(32) == 16) {
w = w ^ 0;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
else {
w = w ^ 10;
if (xlen == 0) {
w =w ^ 1;
}
else if (xlen%(32) == 0) {
w = w ^ 4;
}
else if (xlen%(32) < 16) {
w = w ^ 1;
}
else if (xlen%(32) == 16) {
w = w ^ 0;
}
else {
w = w ^ 5;
}
}
if (adlen == 0) { // AD is an empty string
lfsr_gf56(CNT);
}
else while (adlen > 0) {
adlen = ad_encryption(&ad,s,k,adlen,CNT,40,&l_skinny_ctrl);
}
if ((w & 8) == 0) {
xlen = ad2msg_encryption (&mauth,CNT,s,k,44,xlen,&l_skinny_ctrl);
}
else if (clen == 0) {
lfsr_gf56(CNT);
}
while (xlen > 0) {
xlen = ad_encryption(&mauth,s,k,xlen,CNT,44,&l_skinny_ctrl);
}
nonce_encryption(N,CNT,s,k,w,&l_skinny_ctrl);
// Tag generation
g8A(s, T);
l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1;
reset_lfsr_gf56(CNT);
p1 = T;
p2 = (unsigned char*)&c[clen - 16];
p1[0] = p2[0];
p1[1] = p2[1];
p1[2] = p2[2];
p1[3] = p2[3];
p1[4] = p2[4];
p1[5] = p2[5];
p1[6] = p2[6];
p1[7] = p2[7];
p1[8] = p2[8];
p1[9] = p2[9];
p1[10] = p2[10];
p1[11] = p2[11];
p1[12] = p2[12];
p1[13] = p2[13];
p1[14] = p2[14];
p1[15] = p2[15];
#ifdef ___ENABLE_DWORD_CAST
*(uint64_t*)(&s[0]) = *(uint64_t*)(&T[0]);
*(uint64_t*)(&s[8]) = *(uint64_t*)(&T[8]);
#else
*(uint32_t*)(&s[0]) = *(uint32_t*)(&T[0]);
*(uint32_t*)(&s[4]) = *(uint32_t*)(&T[4]);
*(uint32_t*)(&s[8]) = *(uint32_t*)(&T[8]);
*(uint32_t*)(&s[12]) = *(uint32_t*)(&T[12]);
#endif
clen = clen - 16;
*mlen = clen;
if (clen > 0) {
nonce_encryption(N,CNT,s,k,36,&l_skinny_ctrl);
l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1;
while (clen > 16) {
clen = msg_decryption(&m,&c,N,CNT,s,k,36,clen,&l_skinny_ctrl);
}
irho_ud16(m, c, s, clen);
c = c + clen;
m = m + clen;
}
for (int i = 0; i < 16; i++) {
if (T[i] != (*(c+i))) {
return -1;
}
}
return 0;
}
#define ___SKINNY_LOOP
#define ___NUM_OF_ROUNDS_56
#if (defined(__riscv_xlen) && (__riscv_xlen == 64))
#define ___ENABLE_DWORD_CAST
#endif
#include <stdint.h>
typedef struct ___skinny_ctrl {
#ifdef ___NUM_OF_ROUNDS_56
unsigned char roundKeys[960]; // number of rounds : 56
#else
unsigned char roundKeys[704]; // number of rounds : 40
#endif
void (*func_skinny_128_384_enc)(unsigned char*, struct ___skinny_ctrl*, unsigned char* CNT, unsigned char* T, const unsigned char* K);
} skinny_ctrl;
extern void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K);
extern void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K);
extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K);
#define pack_word(x0, x1, x2, x3, w) \
w = ((x3) << 24) ^ \
((x2) << 16) ^ \
((x1) << 8) ^ \
(x0);
#define unpack_word(x0, x1, x2, x3, w) \
x0 = ((w) & 0xff); \
x1 = (((w) >> 8) & 0xff); \
x2 = (((w) >> 16) & 0xff); \
x3 = ((w) >> 24);
#ifdef ___ENABLE_DWORD_CAST
#define PERMUTATION() \
/* permutation */ \
\
/* 7 6 5 4 3 2 1 0 */ \
/* 5 7 2 3 6 0 4 1 */ \
\
/* dw (7 6 5 4 3 2 1 0) */ \
\
/* dw (5 7 2 3 6 0 4 1) */ \
\
dt0 = dw >> 24; /* - - - 7 6 5 4 3 */ \
dt0 = dt0 & 0x00000000ff00ff00; /* - - - - 6 - 4 - */ \
\
dt1 = dw << 16; /* 5 4 3 2 1 0 - - */ \
dt1 = dt1 & 0xff00000000ff0000; /* 5 - - - - 0 - - */ \
dt0 = dt0 ^ dt1; /* 5 - - - 6 0 4 - */ \
\
dt1 = dw >> 8; /* - 7 6 5 4 3 2 1 */ \
dt1 = dt1 & 0x00ff0000000000ff; /* - 7 - - - - - 1 */ \
dt0 = dt0 ^ dt1; /* 5 7 - - 6 0 4 1 */ \
\
dt1 = dw << 8; /* 6 5 4 3 2 1 0 - */ \
dt1 = dt1 & 0x000000ff00000000; /* - - - 3 - - - - */ \
dt0 = dt0 ^ dt1; /* 5 7 - 3 6 0 4 1 */ \
\
dt1 = dw << 24; /* 4 3 2 1 0 - - - */ \
dw = dt1 & 0x0000ff0000000000; /* - - 2 - - - - - */ \
dw = dw ^ dt0; /* 5 7 2 3 6 0 4 1 */
#else
#define PERMUTATION() \
/* permutation */ \
\
/* 7 6 5 4 3 2 1 0 */ \
/* 5 7 2 3 6 0 4 1 */ \
\
/* w0 (3 2 1 0) */ \
/* w1 (7 6 5 4) */ \
\
/* w0 (6 0 4 1) */ \
/* w1 (5 7 2 3) */ \
\
t0 = w1 << 8; /* 6 5 4 - */ \
t0 = t0 & 0xff00ff00; /* 6 - 4 - */ \
\
t1 = w1 << 16; /* 5 4 - - */ \
t1 = t1 & 0xff000000; /* 5 - - - */ \
\
t2 = w1 & 0xff000000; /* 7 - - - */ \
t2 = t2 >> 8; /* - 7 - - */ \
t1 = t1 ^ t2; /* 5 7 - - */ \
\
t2 = w0 & 0xff000000; /* 3 - - - */ \
t2 = t2 >> 24; /* - - - 3 */ \
t1 = t1 ^ t2; /* 5 7 - 3 */ \
\
w1 = w0 >> 8; /* - 3 2 1 */ \
w1 = w1 & 0x0000ff00; /* - - 2 - */ \
w1 = w1 ^ t1; /* 5 7 2 3 */ \
\
t2 = w0 & 0x0000ff00; /* - - 1 - */ \
t2 = t2 >> 8; /* - - - 1 */ \
t0 = t0 ^ t2; /* 6 - 4 1 */ \
\
w0 = w0 << 16; /* 1 0 - - */ \
w0 = w0 & 0x00ff0000; /* - 0 - - */ \
w0 = w0 ^ t0; /* 6 0 4 1 */
#endif
/******************************************************************************
* Copyright (c) 2020, NEC Corporation.
*
* THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND.
*
*****************************************************************************/
/*
* SKINNY-128-384
*
* load * AC(c0 c1) ^ TK3
* calc AC(c0 c1) ^ TK2 -> store
* ART(TK2)
*
* number of rounds : 40 or 56
*/
#include "skinny.h"
#ifdef ___ENABLE_DWORD_CAST
#define PERMUTATION_TK2() \
\
/* permutation */ \
\
PERMUTATION() \
\
/* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \
dw = ((dw << 1) & 0xfefefefefefefefe) ^ \
(((dw >> 7) ^ (dw >> 5)) & 0x0101010101010101); \
\
/* Load TK3 */ \
/* TK2^TK3^AC(c0 c1) */ \
/* store */ \
*tk2 = dw ^ *tk3; \
tk2 += 2; \
tk3 += 2;
#ifndef ___SKINNY_LOOP
void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys)
{
uint64_t* tk2; // used in MACRO
uint64_t* tk3; // used in MACRO
uint64_t dt0; // used in MACRO
uint64_t dt1; // used in MACRO
uint64_t dw;
// odd
// load master key
// load master key
dw = *(uint64_t*)&roundKeys[16];
tk2 = (uint64_t*)&roundKeys[64];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[384];
#else
tk3 = (uint64_t*)&roundKeys[512];
#endif
// 1st round
*tk2 = dw ^ *tk3;
tk2 += 2;
tk3 += 2;
// 3rd,5th, ... ,37th,39th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#ifdef ___NUM_OF_ROUNDS_56
// 41th,43th, ... ,51th,53th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#endif
// even
// load master key
dw = *(uint64_t*)&roundKeys[24];
tk2 = (uint64_t*)&roundKeys[72];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[392];
#else
tk3 = (uint64_t*)&roundKeys[520];
#endif
// 2nd,4th, ... ,54th,56th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#ifdef ___NUM_OF_ROUNDS_56
// 42nd,44th, ... ,54th,56th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#endif
}
#else /* ___SKINNY_LOOP */
void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys)
{
uint64_t* tk2; // used in MACRO
uint64_t* tk3; // used in MACRO
uint64_t dt0; // used in MACRO
uint64_t dt1; // used in MACRO
uint64_t dw;
// odd
// load master key
dw = *(uint64_t*)&roundKeys[16];
tk2 = (uint64_t*)&roundKeys[64];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[384];
#else
tk3 = (uint64_t*)&roundKeys[512];
#endif
// 1st round
*tk2 = dw ^ *tk3;
tk2 += 2;
tk3 += 2;
// 3rd,5th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<19;i++)
#else
for(int i=0;i<27;i++)
#endif
{
PERMUTATION_TK2();
}
// even
// load master key
dw = *(uint64_t*)&roundKeys[24];
tk2 = (uint64_t*)&roundKeys[72];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[392];
#else
tk3 = (uint64_t*)&roundKeys[520];
#endif
// 2nd,4th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<20;i++)
#else
for(int i=0;i<28;i++)
#endif
{
PERMUTATION_TK2();
}
}
#endif /* ___SKINNY_LOOP */
#else /* ___ENABLE_DWORD_CAST */
#define PERMUTATION_TK2() \
\
/* permutation */ \
\
PERMUTATION() \
\
/* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \
w0 = ((w0 << 1) & 0xfefefefe) ^ \
(((w0 >> 7) ^ (w0 >> 5)) & 0x01010101); \
w1 = ((w1 << 1) & 0xfefefefe) ^ \
(((w1 >> 7) ^ (w1 >> 5)) & 0x01010101); \
\
/* Load TK3 */ \
/* TK2^TK3^AC(c0 c1) */ \
/* store */ \
*tk2++ = w0 ^ *tk3++; \
*tk2++ = w1 ^ *tk3++; \
tk2 += 2; \
tk3 += 2;
#ifndef ___SKINNY_LOOP
void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys)
{
uint32_t* tk2; // used in MACRO
uint32_t* tk3; // used in MACRO
uint32_t t0; // used in MACRO
uint32_t t1; // used in MACRO
uint32_t t2; // used in MACRO
uint32_t w0;
uint32_t w1;
// odd
// load master key
w0 = *(uint32_t*)&roundKeys[16];
w1 = *(uint32_t*)&roundKeys[20];
tk2 = (uint32_t*)&roundKeys[64];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[384];
#else
tk3 = (uint32_t*)&roundKeys[512];
#endif
// 1st round
*tk2++ = w0 ^ *tk3++;
*tk2++ = w1 ^ *tk3++;
tk2 += 2;
tk3 += 2;
// 3rd,5th, ... ,37th,39th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#ifdef ___NUM_OF_ROUNDS_56
// 41th,43th, ... ,51th,53th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#endif
// even
// load master key
w0 = *(uint32_t*)&roundKeys[24];
w1 = *(uint32_t*)&roundKeys[28];
tk2 = (uint32_t*)&roundKeys[72];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[392];
#else
tk3 = (uint32_t*)&roundKeys[520];
#endif
// 2nd,4th, ... ,54th,56th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#ifdef ___NUM_OF_ROUNDS_56
// 42nd,44th, ... ,54th,56th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#endif
}
#else /* ___SKINNY_LOOP */
void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys)
{
uint32_t* tk2; // used in MACRO
uint32_t* tk3; // used in MACRO
uint32_t t0; // used in MACRO
uint32_t t1; // used in MACRO
uint32_t t2; // used in MACRO
uint32_t w0;
uint32_t w1;
// odd
// load master key
w0 = *(uint32_t*)&roundKeys[16];
w1 = *(uint32_t*)&roundKeys[20];
tk2 = (uint32_t*)&roundKeys[64];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[384];
#else
tk3 = (uint32_t*)&roundKeys[512];
#endif
// 1st round
*tk2++ = w0 ^ *tk3++;
*tk2++ = w1 ^ *tk3++;
tk2 += 2;
tk3 += 2;
// 3rd,5th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<19;i++)
#else
for(int i=0;i<27;i++)
#endif
{
PERMUTATION_TK2();
}
// even
// load master key
w0 = *(uint32_t*)&roundKeys[24];
w1 = *(uint32_t*)&roundKeys[28];
tk2 = (uint32_t*)&roundKeys[72];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[392];
#else
tk3 = (uint32_t*)&roundKeys[520];
#endif
// 2nd,4th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<20;i++)
#else
for(int i=0;i<28;i++)
#endif
{
PERMUTATION_TK2();
}
}
#endif /* ___SKINNY_LOOP */
#endif /* ___ENABLE_DWORD_CAST */
/******************************************************************************
* Copyright (c) 2020, NEC Corporation.
*
* THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND.
*
*****************************************************************************/
/*
* SKINNY-128-384
*
* AC(c0 c1) ^ TK3 -> store
* ART(TK3)
*
* number of rounds : 40 or 56
*/
#include "skinny.h"
#ifdef ___ENABLE_DWORD_CAST
#define PERMUTATION_TK3(c0Val, c1Val) \
\
/* permutation */ \
\
PERMUTATION() \
\
/* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \
dw = ((dw >> 1) & 0x7f7f7f7f7f7f7f7f) ^ \
(((dw << 7) ^ (dw << 1)) & 0x8080808080808080); \
\
/* K3^AC(c0 c1) */ \
/* store */ \
dt0 = dw ^ c0Val; \
*tk3 = dt0 ^ ((uint64_t)c1Val << 40); \
tk3 += 2;
#ifndef ___SKINNY_LOOP
void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys)
{
uint64_t *tk3;
uint64_t dt0; // used in MACRO
uint64_t dt1; // used in MACRO
uint64_t dw;
// odd
// load master key
dw = *(uint64_t*)&roundKeys[32];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[384];
#else
tk3 = (uint64_t*)&roundKeys[512];
#endif
// 1st round
*tk3++ = dw ^ 0x01;
tk3 += 1;
// 3rd,5th, ... ,37th,39th round
PERMUTATION_TK3(0x7, 0x0);
PERMUTATION_TK3(0xf, 0x1);
PERMUTATION_TK3(0xd, 0x3);
PERMUTATION_TK3(0x7, 0x3);
PERMUTATION_TK3(0xe, 0x1);
PERMUTATION_TK3(0x9, 0x3);
PERMUTATION_TK3(0x7, 0x2);
PERMUTATION_TK3(0xd, 0x1);
PERMUTATION_TK3(0x5, 0x3);
PERMUTATION_TK3(0x6, 0x1);
PERMUTATION_TK3(0x8, 0x1);
PERMUTATION_TK3(0x1, 0x2);
PERMUTATION_TK3(0x5, 0x0);
PERMUTATION_TK3(0x7, 0x1);
PERMUTATION_TK3(0xc, 0x1);
PERMUTATION_TK3(0x1, 0x3);
PERMUTATION_TK3(0x6, 0x0);
PERMUTATION_TK3(0xb, 0x1);
PERMUTATION_TK3(0xd, 0x2);
#ifdef ___NUM_OF_ROUNDS_56
// 41td,43th, ... ,53th,55th round
PERMUTATION_TK3(0x4, 0x3);
PERMUTATION_TK3(0x2, 0x1);
PERMUTATION_TK3(0x8, 0x0);
PERMUTATION_TK3(0x2, 0x2);
PERMUTATION_TK3(0x9, 0x0);
PERMUTATION_TK3(0x6, 0x2);
PERMUTATION_TK3(0x9, 0x1);
PERMUTATION_TK3(0x5, 0x2);
#endif
// even
// load master key
dw = *(uint64_t*)&roundKeys[40];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[392];
#else
tk3 = (uint64_t*)&roundKeys[520];
#endif
// 2nd,4th, ... ,38th,40th round
PERMUTATION_TK3(0x3, 0x0);
PERMUTATION_TK3(0xf, 0x0);
PERMUTATION_TK3(0xe, 0x3);
PERMUTATION_TK3(0xb, 0x3);
PERMUTATION_TK3(0xf, 0x2);
PERMUTATION_TK3(0xc, 0x3);
PERMUTATION_TK3(0x3, 0x3);
PERMUTATION_TK3(0xe, 0x0);
PERMUTATION_TK3(0xa, 0x3);
PERMUTATION_TK3(0xb, 0x2);
PERMUTATION_TK3(0xc, 0x2);
PERMUTATION_TK3(0x0, 0x3);
PERMUTATION_TK3(0x2, 0x0);
PERMUTATION_TK3(0xb, 0x0);
PERMUTATION_TK3(0xe, 0x2);
PERMUTATION_TK3(0x8, 0x3);
PERMUTATION_TK3(0x3, 0x2);
PERMUTATION_TK3(0xd, 0x0);
PERMUTATION_TK3(0x6, 0x3);
PERMUTATION_TK3(0xa, 0x1);
#ifdef ___NUM_OF_ROUNDS_56
// 42nd,44th, ... ,54th,56th round
PERMUTATION_TK3(0x9, 0x2);
PERMUTATION_TK3(0x4, 0x2);
PERMUTATION_TK3(0x1, 0x1);
PERMUTATION_TK3(0x4, 0x0);
PERMUTATION_TK3(0x3, 0x1);
PERMUTATION_TK3(0xc, 0x0);
PERMUTATION_TK3(0x2, 0x3);
PERMUTATION_TK3(0xa, 0x0);
#endif
}
#else /* ___SKINNY_LOOP */
void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC)
{
uint64_t *tk3;
uint64_t dt0; // used in MACRO
uint64_t dt1; // used in MACRO
uint64_t dw;
uint64_t c0;
uint64_t c1;
// odd
// load master key
dw = *(uint64_t*)&roundKeys[32];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[384];
#else
tk3 = (uint64_t*)&roundKeys[512];
#endif
// 1st round
*tk3++ = dw ^ 0x01;
tk3 += 1;
pRC += 4;
// 3rd,5th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<19;i++)
#else
for(int i=0;i<27;i++)
#endif
{
c0 = *pRC++;
c1 = *pRC++;
pRC += 2;
PERMUTATION_TK3(c0, c1);
}
// even
// load master key
dw = *(uint64_t*)&roundKeys[40];
#ifndef ___NUM_OF_ROUNDS_56
pRC -= 78;
tk3 = (uint64_t*)&roundKeys[392];
#else
pRC -= 110;
tk3 = (uint64_t*)&roundKeys[520];
#endif
// 2nd,4th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<20;i++)
#else
for(int i=0;i<28;i++)
#endif
{
c0 = *pRC++;
c1 = *pRC++;
pRC += 2;
PERMUTATION_TK3(c0, c1);
}
}
#endif /* ___SKINNY_LOOP */
#else /* ___ENABLE_DWORD_CAST */
#define PERMUTATION_TK3(c0Val, c1Val) \
\
/* permutation */ \
\
PERMUTATION() \
\
/* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \
w0 = ((w0 >> 1) & 0x7f7f7f7f) ^ \
(((w0 << 7) ^ (w0 << 1)) & 0x80808080); \
w1 = ((w1 >> 1) & 0x7f7f7f7f) ^ \
(((w1 << 7) ^ (w1 << 1)) & 0x80808080); \
\
/* K3^AC(c0 c1) */ \
/* store */ \
*tk3++ = w0 ^ c0Val; \
*tk3++ = w1 ^ ((uint32_t)c1Val << 8); \
tk3 += 2;
#ifndef ___SKINNY_LOOP
void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys)
{
uint32_t *tk3;
uint32_t t0; // used in MACRO
uint32_t t1; // used in MACRO
uint32_t t2; // used in MACRO
uint32_t w0;
uint32_t w1;
// odd
// load master key
w0 = *(uint32_t*)&roundKeys[32];
w1 = *(uint32_t*)&roundKeys[36];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[384];
#else
tk3 = (uint32_t*)&roundKeys[512];
#endif
// 1st round
*tk3++ = w0 ^ 0x01;
*tk3++ = w1;
tk3 += 2;
// 3rd,5th, ... ,37th,39th round
PERMUTATION_TK3(0x7, 0x0);
PERMUTATION_TK3(0xf, 0x1);
PERMUTATION_TK3(0xd, 0x3);
PERMUTATION_TK3(0x7, 0x3);
PERMUTATION_TK3(0xe, 0x1);
PERMUTATION_TK3(0x9, 0x3);
PERMUTATION_TK3(0x7, 0x2);
PERMUTATION_TK3(0xd, 0x1);
PERMUTATION_TK3(0x5, 0x3);
PERMUTATION_TK3(0x6, 0x1);
PERMUTATION_TK3(0x8, 0x1);
PERMUTATION_TK3(0x1, 0x2);
PERMUTATION_TK3(0x5, 0x0);
PERMUTATION_TK3(0x7, 0x1);
PERMUTATION_TK3(0xc, 0x1);
PERMUTATION_TK3(0x1, 0x3);
PERMUTATION_TK3(0x6, 0x0);
PERMUTATION_TK3(0xb, 0x1);
PERMUTATION_TK3(0xd, 0x2);
#ifdef ___NUM_OF_ROUNDS_56
// 41td,43th, ... ,53th,55th round
PERMUTATION_TK3(0x4, 0x3);
PERMUTATION_TK3(0x2, 0x1);
PERMUTATION_TK3(0x8, 0x0);
PERMUTATION_TK3(0x2, 0x2);
PERMUTATION_TK3(0x9, 0x0);
PERMUTATION_TK3(0x6, 0x2);
PERMUTATION_TK3(0x9, 0x1);
PERMUTATION_TK3(0x5, 0x2);
#endif
// even
// load master key
w0 = *(uint32_t*)&roundKeys[40];
w1 = *(uint32_t*)&roundKeys[44];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[392];
#else
tk3 = (uint32_t*)&roundKeys[520];
#endif
// 2nd,4th, ... ,38th,40th round
PERMUTATION_TK3(0x3, 0x0);
PERMUTATION_TK3(0xf, 0x0);
PERMUTATION_TK3(0xe, 0x3);
PERMUTATION_TK3(0xb, 0x3);
PERMUTATION_TK3(0xf, 0x2);
PERMUTATION_TK3(0xc, 0x3);
PERMUTATION_TK3(0x3, 0x3);
PERMUTATION_TK3(0xe, 0x0);
PERMUTATION_TK3(0xa, 0x3);
PERMUTATION_TK3(0xb, 0x2);
PERMUTATION_TK3(0xc, 0x2);
PERMUTATION_TK3(0x0, 0x3);
PERMUTATION_TK3(0x2, 0x0);
PERMUTATION_TK3(0xb, 0x0);
PERMUTATION_TK3(0xe, 0x2);
PERMUTATION_TK3(0x8, 0x3);
PERMUTATION_TK3(0x3, 0x2);
PERMUTATION_TK3(0xd, 0x0);
PERMUTATION_TK3(0x6, 0x3);
PERMUTATION_TK3(0xa, 0x1);
#ifdef ___NUM_OF_ROUNDS_56
// 42nd,44th, ... ,54th,56th round
PERMUTATION_TK3(0x9, 0x2);
PERMUTATION_TK3(0x4, 0x2);
PERMUTATION_TK3(0x1, 0x1);
PERMUTATION_TK3(0x4, 0x0);
PERMUTATION_TK3(0x3, 0x1);
PERMUTATION_TK3(0xc, 0x0);
PERMUTATION_TK3(0x2, 0x3);
PERMUTATION_TK3(0xa, 0x0);
#endif
}
#else /* ___SKINNY_LOOP */
void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC)
{
uint32_t *tk3;
uint32_t t0; // used in MACRO
uint32_t t1; // used in MACRO
uint32_t t2; // used in MACRO
uint32_t w0;
uint32_t w1;
uint32_t c0;
uint32_t c1;
// odd
// load master key
w0 = *(uint32_t*)&roundKeys[32];
w1 = *(uint32_t*)&roundKeys[36];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[384];
#else
tk3 = (uint32_t*)&roundKeys[512];
#endif
// 1st round
*tk3++ = w0 ^ 0x01;
*tk3++ = w1;
tk3 += 2;
pRC += 4;
// 3rd,5th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<19;i++)
#else
for(int i=0;i<27;i++)
#endif
{
c0 = *pRC++;
c1 = *pRC++;
pRC += 2;
PERMUTATION_TK3(c0, c1);
}
// even
// load master key
w0 = *(uint32_t*)&roundKeys[40];
w1 = *(uint32_t*)&roundKeys[44];
#ifndef ___NUM_OF_ROUNDS_56
pRC -= 78;
tk3 = (uint32_t*)&roundKeys[392];
#else
pRC -= 110;
tk3 = (uint32_t*)&roundKeys[520];
#endif
// 2nd,4th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<20;i++)
#else
for(int i=0;i<28;i++)
#endif
{
c0 = *pRC++;
c1 = *pRC++;
pRC += 2;
PERMUTATION_TK3(c0, c1);
}
}
#endif /* ___SKINNY_LOOP */
#endif /* ___ENABLE_DWORD_CAST */
/******************************************************************************
* Copyright (c) 2020, NEC Corporation.
*
* THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND.
*
*****************************************************************************/
/*
* SKINNY-128-384
*
* ART(TK1) -> store
* load AC(c0 c1) ^ TK3 ^ TK2
* load TK1
* calc AC(c0 c1) ^ TK3 ^ TK2 ^ TK1 -> use at (AC->ART)
* SC->SR->(AC->ART)->MC
*
* number of rounds : 40 or 56
*/
#include "skinny.h"
/*
* S-BOX
*/
unsigned char SBOX[]
= {
// Original
0x65, 0x4c, 0x6a, 0x42, 0x4b, 0x63, 0x43, 0x6b, 0x55, 0x75, 0x5a, 0x7a, 0x53, 0x73, 0x5b, 0x7b,
0x35, 0x8c, 0x3a, 0x81, 0x89, 0x33, 0x80, 0x3b, 0x95, 0x25, 0x98, 0x2a, 0x90, 0x23, 0x99, 0x2b,
0xe5, 0xcc, 0xe8, 0xc1, 0xc9, 0xe0, 0xc0, 0xe9, 0xd5, 0xf5, 0xd8, 0xf8, 0xd0, 0xf0, 0xd9, 0xf9,
0xa5, 0x1c, 0xa8, 0x12, 0x1b, 0xa0, 0x13, 0xa9, 0x05, 0xb5, 0x0a, 0xb8, 0x03, 0xb0, 0x0b, 0xb9,
0x32, 0x88, 0x3c, 0x85, 0x8d, 0x34, 0x84, 0x3d, 0x91, 0x22, 0x9c, 0x2c, 0x94, 0x24, 0x9d, 0x2d,
0x62, 0x4a, 0x6c, 0x45, 0x4d, 0x64, 0x44, 0x6d, 0x52, 0x72, 0x5c, 0x7c, 0x54, 0x74, 0x5d, 0x7d,
0xa1, 0x1a, 0xac, 0x15, 0x1d, 0xa4, 0x14, 0xad, 0x02, 0xb1, 0x0c, 0xbc, 0x04, 0xb4, 0x0d, 0xbd,
0xe1, 0xc8, 0xec, 0xc5, 0xcd, 0xe4, 0xc4, 0xed, 0xd1, 0xf1, 0xdc, 0xfc, 0xd4, 0xf4, 0xdd, 0xfd,
0x36, 0x8e, 0x38, 0x82, 0x8b, 0x30, 0x83, 0x39, 0x96, 0x26, 0x9a, 0x28, 0x93, 0x20, 0x9b, 0x29,
0x66, 0x4e, 0x68, 0x41, 0x49, 0x60, 0x40, 0x69, 0x56, 0x76, 0x58, 0x78, 0x50, 0x70, 0x59, 0x79,
0xa6, 0x1e, 0xaa, 0x11, 0x19, 0xa3, 0x10, 0xab, 0x06, 0xb6, 0x08, 0xba, 0x00, 0xb3, 0x09, 0xbb,
0xe6, 0xce, 0xea, 0xc2, 0xcb, 0xe3, 0xc3, 0xeb, 0xd6, 0xf6, 0xda, 0xfa, 0xd3, 0xf3, 0xdb, 0xfb,
0x31, 0x8a, 0x3e, 0x86, 0x8f, 0x37, 0x87, 0x3f, 0x92, 0x21, 0x9e, 0x2e, 0x97, 0x27, 0x9f, 0x2f,
0x61, 0x48, 0x6e, 0x46, 0x4f, 0x67, 0x47, 0x6f, 0x51, 0x71, 0x5e, 0x7e, 0x57, 0x77, 0x5f, 0x7f,
0xa2, 0x18, 0xae, 0x16, 0x1f, 0xa7, 0x17, 0xaf, 0x01, 0xb2, 0x0e, 0xbe, 0x07, 0xb7, 0x0f, 0xbf,
0xe2, 0xca, 0xee, 0xc6, 0xcf, 0xe7, 0xc7, 0xef, 0xd2, 0xf2, 0xde, 0xfe, 0xd7, 0xf7, 0xdf, 0xff,
};
/*
* S-BOX ^ AC(c2)
*/
unsigned char SBOX2[]
= { // Original ^ c2(0x02)
0x67, 0x4e, 0x68, 0x40, 0x49, 0x61, 0x41, 0x69, 0x57, 0x77, 0x58, 0x78, 0x51, 0x71, 0x59, 0x79,
0x37, 0x8e, 0x38, 0x83, 0x8b, 0x31, 0x82, 0x39, 0x97, 0x27, 0x9a, 0x28, 0x92, 0x21, 0x9b, 0x29,
0xe7, 0xce, 0xea, 0xc3, 0xcb, 0xe2, 0xc2, 0xeb, 0xd7, 0xf7, 0xda, 0xfa, 0xd2, 0xf2, 0xdb, 0xfb,
0xa7, 0x1e, 0xaa, 0x10, 0x19, 0xa2, 0x11, 0xab, 0x07, 0xb7, 0x08, 0xba, 0x01, 0xb2, 0x09, 0xbb,
0x30, 0x8a, 0x3e, 0x87, 0x8f, 0x36, 0x86, 0x3f, 0x93, 0x20, 0x9e, 0x2e, 0x96, 0x26, 0x9f, 0x2f,
0x60, 0x48, 0x6e, 0x47, 0x4f, 0x66, 0x46, 0x6f, 0x50, 0x70, 0x5e, 0x7e, 0x56, 0x76, 0x5f, 0x7f,
0xa3, 0x18, 0xae, 0x17, 0x1f, 0xa6, 0x16, 0xaf, 0x00, 0xb3, 0x0e, 0xbe, 0x06, 0xb6, 0x0f, 0xbf,
0xe3, 0xca, 0xee, 0xc7, 0xcf, 0xe6, 0xc6, 0xef, 0xd3, 0xf3, 0xde, 0xfe, 0xd6, 0xf6, 0xdf, 0xff,
0x34, 0x8c, 0x3a, 0x80, 0x89, 0x32, 0x81, 0x3b, 0x94, 0x24, 0x98, 0x2a, 0x91, 0x22, 0x99, 0x2b,
0x64, 0x4c, 0x6a, 0x43, 0x4b, 0x62, 0x42, 0x6b, 0x54, 0x74, 0x5a, 0x7a, 0x52, 0x72, 0x5b, 0x7b,
0xa4, 0x1c, 0xa8, 0x13, 0x1b, 0xa1, 0x12, 0xa9, 0x04, 0xb4, 0x0a, 0xb8, 0x02, 0xb1, 0x0b, 0xb9,
0xe4, 0xcc, 0xe8, 0xc0, 0xc9, 0xe1, 0xc1, 0xe9, 0xd4, 0xf4, 0xd8, 0xf8, 0xd1, 0xf1, 0xd9, 0xf9,
0x33, 0x88, 0x3c, 0x84, 0x8d, 0x35, 0x85, 0x3d, 0x90, 0x23, 0x9c, 0x2c, 0x95, 0x25, 0x9d, 0x2d,
0x63, 0x4a, 0x6c, 0x44, 0x4d, 0x65, 0x45, 0x6d, 0x53, 0x73, 0x5c, 0x7c, 0x55, 0x75, 0x5d, 0x7d,
0xa0, 0x1a, 0xac, 0x14, 0x1d, 0xa5, 0x15, 0xad, 0x03, 0xb0, 0x0c, 0xbc, 0x05, 0xb5, 0x0d, 0xbd,
0xe0, 0xc8, 0xec, 0xc4, 0xcd, 0xe5, 0xc5, 0xed, 0xd0, 0xf0, 0xdc, 0xfc, 0xd5, 0xf5, 0xdd, 0xfd,
};
#ifdef ___SKINNY_LOOP
/*
* Round Constants
*/
unsigned char RC[]
= {
0x01, 0x00, 0x03, 0x00, 0x07, 0x00, 0x0f, 0x00, 0x0f, 0x01, 0x0e, 0x03, 0x0d, 0x03, 0x0b, 0x03,
0x07, 0x03, 0x0f, 0x02, 0x0e, 0x01, 0x0c, 0x03, 0x09, 0x03, 0x03, 0x03, 0x07, 0x02, 0x0e, 0x00,
0x0d, 0x01, 0x0a, 0x03, 0x05, 0x03, 0x0b, 0x02, 0x06, 0x01, 0x0c, 0x02, 0x08, 0x01, 0x00, 0x03,
0x01, 0x02, 0x02, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x07, 0x01, 0x0e, 0x02, 0x0c, 0x01, 0x08, 0x03,
0x01, 0x03, 0x03, 0x02, 0x06, 0x00, 0x0d, 0x00, 0x0b, 0x01, 0x06, 0x03, 0x0d, 0x02, 0x0a, 0x01,
#ifdef ___NUM_OF_ROUNDS_56
0x04, 0x03, 0x09, 0x02, 0x02, 0x01, 0x04, 0x02, 0x08, 0x00, 0x01, 0x01, 0x02, 0x02, 0x04, 0x00,
0x09, 0x00, 0x03, 0x01, 0x06, 0x02, 0x0c, 0x00, 0x09, 0x01, 0x02, 0x03, 0x05, 0x02, 0x0a, 0x00,
#endif
};
#endif
extern void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2);
extern void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys);
#ifdef ___SKINNY_LOOP
extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC);
#else
extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys);
#endif
void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K)
{
uint32_t *pt = (uint32_t*)&pskinny_ctrl->roundKeys[0];
pt[0] = *(uint32_t*)(&CNT[0]);
pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]);
pt[4] = *(uint32_t*)(&T[0]);
pack_word(T[7], T[4], T[5], T[6], pt[5]);
pt[6] = *(uint32_t*)(&T[8]);
pack_word(T[15], T[12], T[13], T[14], pt[7]);
pt[8] = *(uint32_t*)(&K[0]);
pack_word(K[7], K[4], K[5], K[6], pt[9]);
pt[10] = *(uint32_t*)(&K[8]);
pack_word(K[15], K[12], K[13], K[14], pt[11]);
#ifdef ___SKINNY_LOOP
RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys, RC);
#else
RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys);
#endif
RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys);
Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2);
pskinny_ctrl->func_skinny_128_384_enc = skinny_128_384_enc12_12;
}
void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K)
{
(void)K;
uint32_t *pt = &pskinny_ctrl->roundKeys[0];
pt[0] = *(uint32_t*)(&CNT[0]);
pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]);
pt[4] = *(uint32_t*)(&T[0]);
pack_word(T[7], T[4], T[5], T[6], pt[5]);
pt[6] = *(uint32_t*)(&T[8]);
pack_word(T[15], T[12], T[13], T[14], pt[7]);
RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys);
Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2);
}
extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K)
{
(void)T;
(void)K;
uint32_t *pt = &pskinny_ctrl->roundKeys[0];
pt[0] = *(uint32_t*)(&CNT[0]);
pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]);
Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2);
}
#define PERMUTATION_TK1() \
\
/* permutation */ \
{ \
unsigned char tmp0 = roundKeys[0]; \
unsigned char tmp1 = roundKeys[1]; \
unsigned char tmp2 = roundKeys[2]; \
unsigned char tmp3 = roundKeys[3]; \
unsigned char tmp4 = roundKeys[4]; \
unsigned char tmp5 = roundKeys[5]; \
unsigned char tmp6 = roundKeys[6]; \
unsigned char tmp7 = roundKeys[7]; \
\
unsigned char* dst = &roundKeys[8]; \
\
/* 5 7 2 3 6 0 4 1 */ \
*dst++ = tmp1; \
*dst++ = tmp4; \
*dst++ = tmp0; \
*dst++ = tmp6; \
*dst++ = tmp3; \
*dst++ = tmp2; \
*dst++ = tmp7; \
*dst++ = tmp5; \
\
/* 2 5 0 6 7 1 3 4 */ \
*dst++ = tmp4; \
*dst++ = tmp3; \
*dst++ = tmp1; \
*dst++ = tmp7; \
*dst++ = tmp6; \
*dst++ = tmp0; \
*dst++ = tmp5; \
*dst++ = tmp2; \
\
/* 0 2 1 7 5 4 6 3 */ \
*dst++ = tmp3; \
*dst++ = tmp6; \
*dst++ = tmp4; \
*dst++ = tmp5; \
*dst++ = tmp7; \
*dst++ = tmp1; \
*dst++ = tmp2; \
*dst++ = tmp0; \
\
/* 1 0 4 5 2 3 7 6 */ \
*dst++ = tmp6; \
*dst++ = tmp7; \
*dst++ = tmp3; \
*dst++ = tmp2; \
*dst++ = tmp5; \
*dst++ = tmp4; \
*dst++ = tmp0; \
*dst++ = tmp1; \
\
/* 4 1 3 2 0 6 5 7 */ \
*dst++ = tmp7; \
*dst++ = tmp5; \
*dst++ = tmp6; \
*dst++ = tmp0; \
*dst++ = tmp2; \
*dst++ = tmp3; \
*dst++ = tmp1; \
*dst++ = tmp4; \
\
/* 3 4 6 0 1 7 2 5 */ \
*dst++ = tmp5; \
*dst++ = tmp2; \
*dst++ = tmp7; \
*dst++ = tmp1; \
*dst++ = tmp0; \
*dst++ = tmp6; \
*dst++ = tmp4; \
*dst++ = tmp3; \
\
/* 6 3 7 1 4 5 0 2 */ \
*dst++ = tmp2; \
*dst++ = tmp0; \
*dst++ = tmp5; \
*dst++ = tmp4; \
*dst++ = tmp1; \
*dst++ = tmp7; \
*dst++ = tmp3; \
*dst++ = tmp6; \
}
#define SBOX_0(b0, b1, b2, b3) \
\
t0 = sbox[b0]; \
t1 = sbox[b1]; \
t2 = sbox[b2]; \
t3 = sbox[b3]; \
\
b0 = (uint8_t)t0; \
b1 = (uint8_t)t1; \
b2 = (uint8_t)t2; \
b3 = (uint8_t)t3;
#define SBOX_8(b0, b1, b2, b3) \
\
t0 = sbox[b0]; \
t1 = sbox[b1]; \
t2 = sbox[b2]; \
t3 = sbox[b3]; \
\
b0 = (uint8_t)t3; \
b1 = (uint8_t)t0; \
b2 = (uint8_t)t1; \
b3 = (uint8_t)t2;
#define SBOX_16(b0, b1, b2, b3) \
\
t0 = sbox2[b0]; /* AC(c2) */ \
t1 = sbox[b1]; \
t2 = sbox[b2]; \
t3 = sbox[b3]; \
\
b0 = (uint8_t)t2; \
b1 = (uint8_t)t3; \
b2 = (uint8_t)t0; \
b3 = (uint8_t)t1;
#define SBOX_24(b0, b1, b2, b3) \
\
t0 = sbox[b0]; \
t1 = sbox[b1]; \
t2 = sbox[b2]; \
t3 = sbox[b3]; \
\
b0 = (uint8_t)t1; \
b1 = (uint8_t)t2; \
b2 = (uint8_t)t3; \
b3 = (uint8_t)t0;
#ifdef ___ENABLE_DWORD_CAST
#define SKINNY_MAIN() \
{ \
\
/* odd */ \
\
/* LUT(with ShiftRows & AC(c2))*/ \
\
SBOX_0( block[0], block[1], block[2], block[3]); \
SBOX_8( block[4], block[5], block[6], block[7]); \
SBOX_16(block[8], block[9], block[10], block[11]); \
SBOX_24(block[12], block[13], block[14], block[15]); \
\
/* TK1^TK2^TK3^AC(c0 c1) */ \
\
t1 = *(uint64_t*)&block[0]; \
t1 ^= *tk1++; \
t1 ^= *tk2++; \
\
/* MC */ \
\
t2 = *(uint64_t*)&block[8]; \
t0 = t2 >> 32; \
\
/* 0^2 */ \
t3 = t1 ^ t2; \
\
/* 1^2 */ \
t2 = (t1 >> 32) ^ t2; \
\
/* 0^2^3 */ \
t0 = t0 ^ t3; \
\
*(uint32_t*)&block[0] = (uint32_t)t0; \
*(uint32_t*)&block[4] = (uint32_t)t1; \
*(uint32_t*)&block[8] = (uint32_t)t2; \
*(uint32_t*)&block[12] = (uint32_t)t3; \
\
/* even */ \
\
/* LUT(with ShiftRows & AC(c2))*/ \
\
SBOX_0( block[0], block[1], block[2], block[3]); \
SBOX_8( block[4], block[5], block[6], block[7]); \
SBOX_16(block[8], block[9], block[10], block[11]); \
SBOX_24(block[12], block[13], block[14], block[15]); \
\
/* TK2^TK3^AC(c0 c1) */ \
\
t1 = *(uint64_t*)&block[0]; \
t1 ^= *tk2++; \
\
/* MC */ \
\
t2 = *(uint64_t*)&block[8]; \
t0 = t2 >> 32; \
\
/* 0^2 */ \
t3 = t1 ^ t2; \
\
/* 1^2 */ \
t2 = (t1 >> 32) ^ t2; \
\
/* 0^2^3 */ \
t0 = t0 ^ t3; \
\
*(uint32_t*)&block[0] = (uint32_t)t0; \
*(uint32_t*)&block[4] = (uint32_t)t1; \
*(uint32_t*)&block[8] = (uint32_t)t2; \
*(uint32_t*)&block[12] = (uint32_t)t3; \
}
#ifndef ___SKINNY_LOOP
void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2)
{
uint64_t *tk1;
uint64_t *tk2;
uint64_t t0; // used in MACRO
uint64_t t1; // used in MACRO
uint64_t t2; // used in MACRO
uint64_t t3; // used in MACRO
// TK1
PERMUTATION_TK1();
// SB+AC+ShR+MC
tk2 = (uint64_t*)&roundKeys[64];
tk1 = (uint64_t*)&roundKeys[0];
// 1st, ...,16th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
tk1 = (uint64_t*)&roundKeys[0];
// 17th, ...,32th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
tk1 = (uint64_t*)&roundKeys[0];
// 33th, ...,40th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
#ifdef ___NUM_OF_ROUNDS_56
// 41th, ...,48th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
tk1 = (uint64_t*)&roundKeys[0];
// 49th, ... ,56th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
#endif
}
#else /* ___SKINNY_LOOP */
void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2)
{
uint64_t *tk1;
uint64_t *tk2;
uint64_t t0; // used in MACRO
uint64_t t1; // used in MACRO
uint64_t t2; // used in MACRO
uint64_t t3; // used in MACRO
// TK1
PERMUTATION_TK1();
// SB+AC+ShR+MC
tk2 = (uint64_t*)&roundKeys[64];
// 1st, ... ,32th or 48th round
#ifndef ___NUM_OF_ROUNDS_56
for(int j=0;j<2;j++)
#else
for(int j=0;j<3;j++)
#endif
{
tk1 = (uint64_t*)&roundKeys[0];
for(int i=0;i<8;i++)
{
SKINNY_MAIN();
}
}
// 33th , ... ,40th or 49th, .... ,56th round
{
tk1 = (uint64_t*)&roundKeys[0];
for(int i=0;i<4;i++)
{
SKINNY_MAIN();
}
}
}
#endif /* ___SKINNY_LOOP */
#else /* ___ENABLE_DWORD_CAST */
#define SKINNY_MAIN() \
{ \
\
/* odd */ \
\
/* LUT(with ShiftRows & AC(c2))*/ \
\
SBOX_0( block[0], block[1], block[2], block[3]); \
SBOX_8( block[4], block[5], block[6], block[7]); \
SBOX_16(block[8], block[9], block[10], block[11]); \
SBOX_24(block[12], block[13], block[14], block[15]); \
\
/* TK1^TK2^TK3^AC(c0 c1) */ \
\
t1 = *(uint32_t*)&block[0]; \
t0 = *(uint32_t*)&block[4]; \
t1 ^= *tk1++; \
t1 ^= *tk2++; \
t0 ^= *tk1++; \
t0 ^= *tk2++; \
\
/* MC */ \
\
t2 = *(uint32_t*)&block[8]; \
t4 = *(uint32_t*)&block[12]; \
\
/* 0^2 */ \
t3 = t1 ^ t2; \
\
/* 1^2 */ \
t2 = t0 ^ t2; \
\
/* 0^2^3 */ \
t0 = t3 ^ t4; \
\
*(uint32_t*)&block[0] = t0; \
*(uint32_t*)&block[4] = t1; \
*(uint32_t*)&block[8] = t2; \
*(uint32_t*)&block[12] = t3; \
\
/* even */ \
\
/* LUT(with ShiftRows & AC(c2))*/ \
\
SBOX_0( block[0], block[1], block[2], block[3]); \
SBOX_8( block[4], block[5], block[6], block[7]); \
SBOX_16(block[8], block[9], block[10], block[11]); \
SBOX_24(block[12], block[13], block[14], block[15]); \
\
/* TK2^TK3^AC(c0 c1) */ \
\
t1 = *(uint32_t*)&block[0]; \
t0 = *(uint32_t*)&block[4]; \
t1 ^= *tk2++; \
t0 ^= *tk2++; \
\
/* MC */ \
\
t2 = *(uint32_t*)&block[8]; \
t4 = *(uint32_t*)&block[12]; \
\
/* 0^2 */ \
t3 = t1 ^ t2; \
\
/* 1^2 */ \
t2 = t0 ^ t2; \
\
/* 0^2^3 */ \
t0 = t3 ^ t4; \
\
*(uint32_t*)&block[0] = t0; \
*(uint32_t*)&block[4] = t1; \
*(uint32_t*)&block[8] = t2; \
*(uint32_t*)&block[12] = t3; \
}
#ifndef ___SKINNY_LOOP
void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2)
{
uint32_t *tk1;
uint32_t *tk2;
uint32_t t0; // used in MACRO
uint32_t t1; // used in MACRO
uint32_t t2; // used in MACRO
uint32_t t3; // used in MACRO
uint32_t t4; // used in MACRO
// TK1
PERMUTATION_TK1();
// SB+AC+ShR+MC
tk2 = (uint32_t*)&roundKeys[64];
tk1 = (uint32_t*)&roundKeys[0];
// 1st, ...,16th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
tk1 = (uint32_t*)&roundKeys[0];
// 17th, ...,32th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
tk1 = (uint32_t*)&roundKeys[0];
// 33th, ...,40th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
#ifdef ___NUM_OF_ROUNDS_56
// 41th, ...,48th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
tk1 = (uint32_t*)&roundKeys[0];
// 49th, ... ,56th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
#endif
}
#else /* ___SKINNY_LOOP */
void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2)
{
uint32_t *tk1;
uint32_t *tk2;
uint32_t t0; // used in MACRO
uint32_t t1; // used in MACRO
uint32_t t2; // used in MACRO
uint32_t t3; // used in MACRO
uint32_t t4; // used in MACRO
// TK1
PERMUTATION_TK1();
// SB+AC+ShR+MC
tk2 = (uint32_t*)&roundKeys[64];
// 1st, ... ,32th or 48th round
#ifndef ___NUM_OF_ROUNDS_56
for(int j=0;j<2;j++)
#else
for(int j=0;j<3;j++)
#endif
{
tk1 = (uint32_t*)&roundKeys[0];
for(int i=0;i<8;i++)
{
SKINNY_MAIN();
}
}
// 33th , ... ,40th or 49th, .... ,56th round
{
tk1 = (uint32_t*)&roundKeys[0];
for(int i=0;i<4;i++)
{
SKINNY_MAIN();
}
}
}
#endif /* ___SKINNY_LOOP */
#endif /* ___ENABLE_DWORD_CAST */
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
/*
* Date: 29 November 2018
* Contact: Thomas Peyrin - thomas.peyrin@gmail.com
* Mustafa Khairallah - mustafam001@e.ntu.edu.sg
*/
#include "crypto_aead.h"
#include "api.h"
#include "skinny.h"
#include <stdio.h>
#include <stdlib.h>
void pad (const unsigned char* m, unsigned char* mp, int len8) {
#ifdef ___ENABLE_DWORD_CAST
if (0 == len8) {
*(uint64_t*)(&mp[0]) = 0;
*(uint64_t*)(&mp[8]) = 0;
} else if (8 > len8) {
*(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]) & (0xffffffffffffffff >> (64 - len8*8));
*(uint64_t*)(&mp[8]) = 0;
mp[15] = len8;
} else if (8 == len8) {
*(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]);
*(uint64_t*)(&mp[8]) = 0;
mp[15] = 8;
} else if (16 > len8) {
*(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]);
*(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]) & (0xffffffffffffffff >> (128 - len8*8));
mp[15] = len8;
} else {
*(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]);
*(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]);
}
#else
if (0 == len8) {
*(uint32_t*)(&mp[0]) = 0;
*(uint32_t*)(&mp[4]) = 0;
*(uint32_t*)(&mp[8]) = 0;
*(uint32_t*)(&mp[12]) = 0;
} else if (4 > len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]) & (0xffffffff >> (32 - len8*8));
*(uint32_t*)(&mp[4]) = 0;
*(uint32_t*)(&mp[8]) = 0;
*(uint32_t*)(&mp[12]) = 0;
mp[15] = len8;
} else if (4 == len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = 0;
*(uint32_t*)(&mp[8]) = 0;
*(uint32_t*)(&mp[12]) = 0;
mp[15] = 4;
} else if (8 > len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]) & (0xffffffff >> (64 - len8*8));
*(uint32_t*)(&mp[8]) = 0;
*(uint32_t*)(&mp[12]) = 0;
mp[15] = len8;
} else if (8 == len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]);
*(uint32_t*)(&mp[8]) = 0;
*(uint32_t*)(&mp[12]) = 0;
mp[15] = 8;
} else if (12 > len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]);
*(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]) & (0xffffffff >> (96 - len8*8));
*(uint32_t*)(&mp[12]) = 0;
mp[15] = len8;
} else if (12 == len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]);
*(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]);
*(uint32_t*)(&mp[12]) = 0;
mp[15] = 12;
} else if (16 > len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]);
*(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]);
*(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]) & (0xffffffff >> (128 - len8*8));
mp[15] = len8;
} else {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]);
*(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]);
*(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]);
}
#endif
}
void g8A (unsigned char* s, unsigned char* c) {
#ifdef ___ENABLE_DWORD_CAST
uint64_t s0 = *(uint64_t*)(&s[0]);
uint64_t s1 = *(uint64_t*)(&s[8]);
uint64_t c0, c1;
c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080);
c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080);
*(uint64_t*)(&c[0]) = c0;
*(uint64_t*)(&c[8]) = c1;
#else
uint32_t s0 = *(uint32_t*)(&s[0]);
uint32_t s1 = *(uint32_t*)(&s[4]);
uint32_t s2 = *(uint32_t*)(&s[8]);
uint32_t s3 = *(uint32_t*)(&s[12]);
uint32_t c0, c1, c2, c3;
c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080);
c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080);
c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080);
c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080);
*(uint32_t*)(&c[0]) = c0;
*(uint32_t*)(&c[4]) = c1;
*(uint32_t*)(&c[8]) = c2;
*(uint32_t*)(&c[12]) = c3;
#endif
}
void g8A_for_Tag_Generation (unsigned char* s, unsigned char* c) {
#ifdef ___ENABLE_DWORD_CAST
uint64_t s0 = *(uint64_t*)(&s[0]);
uint64_t s1 = *(uint64_t*)(&s[8]);
uint64_t c0, c1;
c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080);
c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080);
// use byte access because of memory alignment.
// c is not always in word(4 byte) alignment.
c[0] = c0 &0xFF;
c[1] = (c0>>8) &0xFF;
c[2] = (c0>>16)&0xFF;
c[3] = (c0>>24)&0xFF;
c[4] = (c0>>32)&0xFF;
c[5] = (c0>>40)&0xFF;
c[6] = (c0>>48)&0xFF;
c[7] = c0>>56;
c[8] = c1 &0xFF;
c[9] = (c1>>8) &0xFF;
c[10] = (c1>>16)&0xFF;
c[11] = (c1>>24)&0xFF;
c[12] = (c1>>32)&0xFF;
c[13] = (c1>>40)&0xFF;
c[14] = (c1>>48)&0xFF;
c[15] = c1>>56;
#else
uint32_t s0 = *(uint32_t*)(&s[0]);
uint32_t s1 = *(uint32_t*)(&s[4]);
uint32_t s2 = *(uint32_t*)(&s[8]);
uint32_t s3 = *(uint32_t*)(&s[12]);
uint32_t c0, c1, c2, c3;
c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080);
c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080);
c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080);
c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080);
// use byte access because of memory alignment.
// c is not always in word(4 byte) alignment.
c[0] = c0 &0xFF;
c[1] = (c0>>8) &0xFF;
c[2] = (c0>>16)&0xFF;
c[3] = c0>>24;
c[4] = c1 &0xFF;
c[5] = (c1>>8) &0xFF;
c[6] = (c1>>16)&0xFF;
c[7] = c1>>24;
c[8] = c2 &0xFF;
c[9] = (c2>>8) &0xFF;
c[10] = (c2>>16)&0xFF;
c[11] = c2>>24;
c[12] = c3 &0xFF;
c[13] = (c3>>8) &0xFF;
c[14] = (c3>>16)&0xFF;
c[15] = c3>>24;
#endif
}
void rho_ad_eqov16 (
const unsigned char* m,
unsigned char* s) {
#ifdef ___ENABLE_DWORD_CAST
*(uint64_t*)(&s[0]) ^= *(uint64_t*)(&m[0]);
*(uint64_t*)(&s[8]) ^= *(uint64_t*)(&m[8]);
#else
*(uint32_t*)(&s[0]) ^= *(uint32_t*)(&m[0]);
*(uint32_t*)(&s[4]) ^= *(uint32_t*)(&m[4]);
*(uint32_t*)(&s[8]) ^= *(uint32_t*)(&m[8]);
*(uint32_t*)(&s[12]) ^= *(uint32_t*)(&m[12]);
#endif
}
void rho_ad_ud16 (
const unsigned char* m,
unsigned char* s,
int len8) {
unsigned char mp [16];
pad(m,mp,len8);
#ifdef ___ENABLE_DWORD_CAST
*(uint64_t*)(&s[0]) ^= *(uint64_t*)(&mp[0]);
*(uint64_t*)(&s[8]) ^= *(uint64_t*)(&mp[8]);
#else
*(uint32_t*)(&s[0]) ^= *(uint32_t*)(&mp[0]);
*(uint32_t*)(&s[4]) ^= *(uint32_t*)(&mp[4]);
*(uint32_t*)(&s[8]) ^= *(uint32_t*)(&mp[8]);
*(uint32_t*)(&s[12]) ^= *(uint32_t*)(&mp[12]);
#endif
}
void rho_eqov16 (
const unsigned char* m,
unsigned char* c,
unsigned char* s) {
g8A(s,c);
#ifdef ___ENABLE_DWORD_CAST
uint64_t c0 = *(uint64_t*)(&c[0]);
uint64_t c1 = *(uint64_t*)(&c[8]);
uint64_t s0 = *(uint64_t*)(&s[0]);
uint64_t s1 = *(uint64_t*)(&s[8]);
uint64_t m0 = *(uint64_t*)(&m[0]);
uint64_t m1 = *(uint64_t*)(&m[8]);
s0 ^= m0;
s1 ^= m1;
c0 ^= m0;
c1 ^= m1;
*(uint64_t*)(&s[0]) = s0;
*(uint64_t*)(&s[8]) = s1;
*(uint64_t*)(&c[0]) = c0;
*(uint64_t*)(&c[8]) = c1;
#else
uint32_t c0 = *(uint32_t*)(&c[0]);
uint32_t c1 = *(uint32_t*)(&c[4]);
uint32_t c2 = *(uint32_t*)(&c[8]);
uint32_t c3 = *(uint32_t*)(&c[12]);
uint32_t s0 = *(uint32_t*)(&s[0]);
uint32_t s1 = *(uint32_t*)(&s[4]);
uint32_t s2 = *(uint32_t*)(&s[8]);
uint32_t s3 = *(uint32_t*)(&s[12]);
uint32_t m0 = *(uint32_t*)(&m[0]);
uint32_t m1 = *(uint32_t*)(&m[4]);
uint32_t m2 = *(uint32_t*)(&m[8]);
uint32_t m3 = *(uint32_t*)(&m[12]);
s0 ^= m0;
s1 ^= m1;
s2 ^= m2;
s3 ^= m3;
c0 ^= m0;
c1 ^= m1;
c2 ^= m2;
c3 ^= m3;
*(uint32_t*)(&s[0]) = s0;
*(uint32_t*)(&s[4]) = s1;
*(uint32_t*)(&s[8]) = s2;
*(uint32_t*)(&s[12]) = s3;
*(uint32_t*)(&c[0]) = c0;
*(uint32_t*)(&c[4]) = c1;
*(uint32_t*)(&c[8]) = c2;
*(uint32_t*)(&c[12]) = c3;
#endif
}
void rho_ud16 (
const unsigned char* m,
unsigned char* c,
unsigned char* s,
int len8) {
unsigned char mp [16];
pad(m,mp,len8);
g8A(s,c);
#ifdef ___ENABLE_DWORD_CAST
uint64_t mp0 = *(uint64_t*)&mp[0];
uint64_t mp1 = *(uint64_t*)&mp[8];
uint64_t c0 = *(uint64_t*)&c[0];
uint64_t c1 = *(uint64_t*)&c[8];
*(uint64_t*)(&s[0]) ^= mp0;
*(uint64_t*)(&s[8]) ^= mp1;
if (0 == len8) {
c0 = 0;
c1 = 0;
} else if (8 > len8) {
c0 = c0 ^ (mp0 & 0xffffffffffffffff >> (64 - (len8*8)));
c0 = c0 ^ (c0 & 0xffffffffffffffff << ( (len8*8)));
c1 = 0;
} else if (8 == len8) {
c0 = c0 ^ mp0;
c1 = 0;
} else if (16 > len8) {
len8 -= 8;
c0 = c0 ^ mp0;
c1 = c1 ^ (mp1 & 0xffffffffffffffff >> (64 - (len8*8)));
c1 = c1 ^ (c1 & 0xffffffffffffffff << ( (len8*8)));
} else {
c0 = c0 ^ mp0;
c1 = c1 ^ mp1;
}
*(uint64_t*)&c[0] = c0;
*(uint64_t*)&c[8] = c1;
#else
uint32_t mp0 = *(uint32_t*)&mp[0];
uint32_t mp1 = *(uint32_t*)&mp[4];
uint32_t mp2 = *(uint32_t*)&mp[8];
uint32_t mp3 = *(uint32_t*)&mp[12];
uint32_t c0 = *(uint32_t*)&c[0];
uint32_t c1 = *(uint32_t*)&c[4];
uint32_t c2 = *(uint32_t*)&c[8];
uint32_t c3 = *(uint32_t*)&c[12];
*(uint32_t*)(&s[0]) ^= mp0;
*(uint32_t*)(&s[4]) ^= mp1;
*(uint32_t*)(&s[8]) ^= mp2;
*(uint32_t*)(&s[12]) ^= mp3;
if (0 == len8) {
c0 = 0;
c1 = 0;
c2 = 0;
c3 = 0;
} else if (4 > len8) {
c0 = c0 ^ (mp0 & 0xffffffff >> (32 - (len8*8)));
c0 = c0 ^ (c0 & 0xffffffff << ( (len8*8)));
c1 = 0;
c2 = 0;
c3 = 0;
} else if (4 == len8) {
c0 = c0 ^ mp0;
c1 = 0;
c2 = 0;
c3 = 0;
} else if (8 > len8) {
len8 -= 4;
c0 = c0 ^ mp0;
c1 = c1 ^ (mp1 & 0xffffffff >> (32 - (len8*8)));
c1 = c1 ^ (c1 & 0xffffffff << ( (len8*8)));
c2 = 0;
c3 = 0;
} else if (8 == len8) {
c0 = c0 ^ mp0;
c1 = c1 ^ mp1;
c2 = 0;
c3 = 0;
} else if (12 > len8) {
len8 -= 8;
c0 = c0 ^ mp0;
c1 = c1 ^ mp1;
c2 = c2 ^ (mp2 & 0xffffffff >> (32 - (len8*8)));
c2 = c2 ^ (c2 & 0xffffffff << ( (len8*8)));
c3 = 0;
} else if (12 == len8) {
c0 = c0 ^ mp0;
c1 = c1 ^ mp1;
c2 = c2 ^ mp2;
c3 = 0;
} else if (16 > len8) {
len8 -= 12;
c0 = c0 ^ mp0;
c1 = c1 ^ mp1;
c2 = c2 ^ mp2;
c3 = c3 ^ (mp3 & 0xffffffff >> (32 - (len8*8)));
c3 = c3 ^ (c3 & 0xffffffff << ( (len8*8)));
} else {
c0 = c0 ^ mp0;
c1 = c1 ^ mp1;
c2 = c2 ^ mp2;
c3 = c3 ^ mp3;
}
*(uint32_t*)&c[0] = c0;
*(uint32_t*)&c[4] = c1;
*(uint32_t*)&c[8] = c2;
*(uint32_t*)&c[12] = c3;
#endif
}
void irho_eqov16 (
unsigned char* m,
const unsigned char* c,
unsigned char* s) {
g8A(s,m);
#ifdef ___ENABLE_DWORD_CAST
uint64_t c0 = *(uint64_t*)(&c[0]);
uint64_t c1 = *(uint64_t*)(&c[8]);
uint64_t s0 = *(uint64_t*)(&s[0]);
uint64_t s1 = *(uint64_t*)(&s[8]);
uint64_t m0 = *(uint64_t*)(&m[0]);
uint64_t m1 = *(uint64_t*)(&m[8]);
s0 ^= c0 ^ m0;
s1 ^= c1 ^ m1;
m0 ^= c0;
m1 ^= c1;
*(uint64_t*)(&s[0]) = s0;
*(uint64_t*)(&s[8]) = s1;
*(uint64_t*)(&m[0]) = m0;
*(uint64_t*)(&m[8]) = m1;
#else
uint32_t c0 = *(uint32_t*)(&c[0]);
uint32_t c1 = *(uint32_t*)(&c[4]);
uint32_t c2 = *(uint32_t*)(&c[8]);
uint32_t c3 = *(uint32_t*)(&c[12]);
uint32_t s0 = *(uint32_t*)(&s[0]);
uint32_t s1 = *(uint32_t*)(&s[4]);
uint32_t s2 = *(uint32_t*)(&s[8]);
uint32_t s3 = *(uint32_t*)(&s[12]);
uint32_t m0 = *(uint32_t*)(&m[0]);
uint32_t m1 = *(uint32_t*)(&m[4]);
uint32_t m2 = *(uint32_t*)(&m[8]);
uint32_t m3 = *(uint32_t*)(&m[12]);
s0 ^= c0 ^ m0;
s1 ^= c1 ^ m1;
s2 ^= c2 ^ m2;
s3 ^= c3 ^ m3;
m0 ^= c0;
m1 ^= c1;
m2 ^= c2;
m3 ^= c3;
*(uint32_t*)(&s[0]) = s0;
*(uint32_t*)(&s[4]) = s1;
*(uint32_t*)(&s[8]) = s2;
*(uint32_t*)(&s[12]) = s3;
*(uint32_t*)(&m[0]) = m0;
*(uint32_t*)(&m[4]) = m1;
*(uint32_t*)(&m[8]) = m2;
*(uint32_t*)(&m[12]) = m3;
#endif
}
void irho_ud16 (
unsigned char* m,
const unsigned char* c,
unsigned char* s,
int len8) {
unsigned char cp [16];
pad(c,cp,len8);
g8A(s,m);
#ifdef ___ENABLE_DWORD_CAST
uint64_t cp0 = *(uint64_t*)&cp[0];
uint64_t cp1 = *(uint64_t*)&cp[8];
uint64_t m0 = *(uint64_t*)&m[0];
uint64_t m1 = *(uint64_t*)&m[8];
uint64_t s0 = *(uint64_t*)&s[0];
uint64_t s1 = *(uint64_t*)&s[8];
s0 ^= cp0;
s1 ^= cp1;
if (0 == len8) {
m0 = 0;
m1 = 0;
} else if (8 > len8) {
s0 = s0 ^ (m0 & 0xffffffffffffffff >> (64 - (len8*8)));
m0 = m0 ^ (cp0 & 0xffffffffffffffff >> (64 - (len8*8)));
m0 = m0 ^ (m0 & 0xffffffffffffffff << ( (len8*8)));
m1 = 0;
} else if (8 == len8) {
s0 = s0 ^ m0;
m0 = m0 ^ cp0;
m1 = 0;
} else if (16 > len8) {
len8 -= 8;
s0 = s0 ^ m0;
s1 = s1 ^ (m1 & 0xffffffffffffffff >> (64 - (len8*8)));
m0 = m0 ^ cp0;
m1 = m1 ^ (cp1 & 0xffffffffffffffff >> (64 - (len8*8)));
m1 = m1 ^ (m1 & 0xffffffffffffffff << ( (len8*8)));
} else {
s0 = s0 ^ m0;
s1 = s1 ^ m1;
m0 = m0 ^ cp0;
m1 = m1 ^ cp1;
}
*(uint64_t*)&s[0] = s0;
*(uint64_t*)&s[8] = s1;
*(uint64_t*)&m[0] = m0;
*(uint64_t*)&m[8] = m1;
#else
uint32_t cp0 = *(uint32_t*)&cp[0];
uint32_t cp1 = *(uint32_t*)&cp[4];
uint32_t cp2 = *(uint32_t*)&cp[8];
uint32_t cp3 = *(uint32_t*)&cp[12];
uint32_t m0 = *(uint32_t*)&m[0];
uint32_t m1 = *(uint32_t*)&m[4];
uint32_t m2 = *(uint32_t*)&m[8];
uint32_t m3 = *(uint32_t*)&m[12];
uint32_t s0 = *(uint32_t*)&s[0];
uint32_t s1 = *(uint32_t*)&s[4];
uint32_t s2 = *(uint32_t*)&s[8];
uint32_t s3 = *(uint32_t*)&s[12];
s0 ^= cp0;
s1 ^= cp1;
s2 ^= cp2;
s3 ^= cp3;
if (0 == len8) {
m0 = 0;
m1 = 0;
m2 = 0;
m3 = 0;
} else if (4 > len8) {
s0 = s0 ^ (m0 & 0xffffffff >> (32 - (len8*8)));
m0 = m0 ^ (cp0 & 0xffffffff >> (32 - (len8*8)));
m0 = m0 ^ (m0 & 0xffffffff << ( (len8*8)));
m1 = 0;
m2 = 0;
m3 = 0;
} else if (4 == len8) {
s0 = s0 ^ m0;
m0 = m0 ^ cp0;
m1 = 0;
m2 = 0;
m3 = 0;
} else if (8 > len8) {
len8 -= 4;
s0 = s0 ^ m0;
s1 = s1 ^ (m1 & 0xffffffff >> (32 - (len8*8)));
m0 = m0 ^ cp0;
m1 = m1 ^ (cp1 & 0xffffffff >> (32 - (len8*8)));
m1 = m1 ^ (m1 & 0xffffffff << ( (len8*8)));
m2 = 0;
m3 = 0;
} else if (8 == len8) {
s0 = s0 ^ m0;
s1 = s1 ^ m1;
m0 = m0 ^ cp0;
m1 = m1 ^ cp1;
m2 = 0;
m3 = 0;
} else if (12 > len8) {
len8 -= 8;
s0 = s0 ^ m0;
s1 = s1 ^ m1;
s2 = s2 ^ (m2 & 0xffffffff >> (32 - (len8*8)));
m0 = m0 ^ cp0;
m1 = m1 ^ cp1;
m2 = m2 ^ (cp2 & 0xffffffff >> (32 - (len8*8)));
m2 = m2 ^ (m2 & 0xffffffff << ( (len8*8)));
m3 = 0;
} else if (12 == len8) {
s0 = s0 ^ m0;
s1 = s1 ^ m1;
s2 = s2 ^ m2;
m0 = m0 ^ cp0;
m1 = m1 ^ cp1;
m2 = m2 ^ cp2;
m3 = 0;
} else if (16 > len8) {
len8 -= 12;
s0 = s0 ^ m0;
s1 = s1 ^ m1;
s2 = s2 ^ m2;
s3 = s3 ^ (m3 & 0xffffffff >> (32 - (len8*8)));
m0 = m0 ^ cp0;
m1 = m1 ^ cp1;
m2 = m2 ^ cp2;
m3 = m3 ^ (cp3 & 0xffffffff >> (32 - (len8*8)));
m3 = m3 ^ (m3 & 0xffffffff << ( (len8*8)));
} else {
s0 = s0 ^ m0;
s1 = s1 ^ m1;
s2 = s2 ^ m2;
s3 = s3 ^ m3;
m0 = m0 ^ cp0;
m1 = m1 ^ cp1;
m2 = m2 ^ cp2;
m3 = m3 ^ cp3;
}
*(uint32_t*)&s[0] = s0;
*(uint32_t*)&s[4] = s1;
*(uint32_t*)&s[8] = s2;
*(uint32_t*)&s[12] = s3;
*(uint32_t*)&m[0] = m0;
*(uint32_t*)&m[4] = m1;
*(uint32_t*)&m[8] = m2;
*(uint32_t*)&m[12] = m3;
#endif
}
void reset_lfsr_gf56 (unsigned char* CNT) {
#ifdef ___ENABLE_DWORD_CAST
*(uint64_t*)(&CNT[0]) = 0x0000000000000001; // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0
#else
*(uint32_t*)(&CNT[0]) = 0x00000001; // CNT3 CNT2 CNT1 CNT0
*(uint32_t*)(&CNT[4]) = 0x00000000; // CNT7 CNT6 CNT5 CNT4
#endif
}
void lfsr_gf56 (unsigned char* CNT) {
#ifdef ___ENABLE_DWORD_CAST
uint64_t C0;
uint64_t fb0;
C0 = *(uint64_t*)(&CNT[0]); // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0
fb0 = 0;
if (CNT[6] & 0x80) {
fb0 = 0x95;
}
C0 = C0 << 1 ^ fb0;
*(uint64_t*)(&CNT[0]) = C0;
#else
uint32_t C0;
uint32_t C1;
uint32_t fb0;
C0 = *(uint32_t*)(&CNT[0]); // CNT3 CNT2 CNT1 CNT0
C1 = *(uint32_t*)(&CNT[4]); // CNT7 CNT6 CNT5 CNT4
fb0 = 0;
if (CNT[6] & 0x80) {
fb0 = 0x95;
}
C1 = C1 << 1 | C0 >> 31;
C0 = C0 << 1 ^ fb0;
*(uint32_t*)(&CNT[0]) = C0;
*(uint32_t*)(&CNT[4]) = C1;
#endif
}
void block_cipher(
unsigned char* s,
const unsigned char* k, unsigned char* T,
unsigned char* CNT, unsigned char D,
skinny_ctrl* p_skinny_ctrl) {
CNT[7] = D;
p_skinny_ctrl->func_skinny_128_384_enc(s, p_skinny_ctrl, CNT, T, k);
}
void nonce_encryption (
const unsigned char* N,
unsigned char* CNT,
unsigned char*s, const unsigned char* k,
unsigned char D,
skinny_ctrl* p_skinny_ctrl) {
block_cipher(s,k,(unsigned char*)N,CNT,D,p_skinny_ctrl);
}
void generate_tag (
unsigned char** c, unsigned char* s,
unsigned long long* clen) {
g8A_for_Tag_Generation(s, *c);
*c = *c + 16;
*c = *c - *clen;
}
unsigned long long msg_encryption_eqov16 (
const unsigned char** M, unsigned char** c,
const unsigned char* N,
unsigned char* CNT,
unsigned char*s, const unsigned char* k,
unsigned char D,
unsigned long long mlen,
skinny_ctrl* p_skinny_ctrl) {
rho_eqov16(*M, *c, s);
*c = *c + 16;
*M = *M + 16;
lfsr_gf56(CNT);
nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl);
return mlen - 16;
}
unsigned long long msg_encryption_ud16 (
const unsigned char** M, unsigned char** c,
const unsigned char* N,
unsigned char* CNT,
unsigned char*s, const unsigned char* k,
unsigned char D,
unsigned long long mlen,
skinny_ctrl* p_skinny_ctrl) {
// char msg[64];
//
// unsigned int st = (unsigned int )read_cycle();
rho_ud16(*M, *c, s, mlen);
// unsigned int ed = (unsigned int )read_cycle();
// sprintf(msg, "rho_ud16 %d\n", ed-st);
// SerialPuts(msg);
//
// fprint_bstr(NULL, "c = ", *c, 16);
*c = *c + mlen;
*M = *M + mlen;
lfsr_gf56(CNT);
nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl);
return 0;
}
unsigned long long msg_decryption_eqov16 (
unsigned char** M, const unsigned char** c,
const unsigned char* N,
unsigned char* CNT,
unsigned char*s, const unsigned char* k,
unsigned char D,
unsigned long long clen,
skinny_ctrl* p_skinny_ctrl) {
irho_eqov16(*M, *c, s);
*c = *c + 16;
*M = *M + 16;
lfsr_gf56(CNT);
nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl);
return clen - 16;
}
unsigned long long msg_decryption_ud16 (
unsigned char** M, const unsigned char** c,
const unsigned char* N,
unsigned char* CNT,
unsigned char*s, const unsigned char* k,
unsigned char D,
unsigned long long clen,
skinny_ctrl* p_skinny_ctrl) {
irho_ud16(*M, *c, s, clen);
*c = *c + clen;
*M = *M + clen;
lfsr_gf56(CNT);
nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl);
return 0;
}
unsigned long long ad_encryption_eqov32 (
const unsigned char** A, unsigned char* s,
const unsigned char* k, unsigned long long adlen,
unsigned char* CNT,
unsigned char D,
skinny_ctrl* p_skinny_ctrl) {
unsigned char T [16];
rho_ad_eqov16(*A, s);
*A = *A + 16;
lfsr_gf56(CNT);
#ifdef ___ENABLE_DWORD_CAST
*(uint64_t*)(&T[0]) = *(uint64_t*)(&(*A)[0]);
*(uint64_t*)(&T[8]) = *(uint64_t*)(&(*A)[8]);
#else
*(uint32_t*)(&T[0]) = *(uint32_t*)(&(*A)[0]);
*(uint32_t*)(&T[4]) = *(uint32_t*)(&(*A)[4]);
*(uint32_t*)(&T[8]) = *(uint32_t*)(&(*A)[8]);
*(uint32_t*)(&T[12]) = *(uint32_t*)(&(*A)[12]);
#endif
*A = *A + 16;
block_cipher(s,k,T,CNT,D,p_skinny_ctrl);
lfsr_gf56(CNT);
return adlen - 32;
}
unsigned long long ad_encryption_ov16 (
const unsigned char** A, unsigned char* s,
const unsigned char* k, unsigned long long adlen,
unsigned char* CNT,
unsigned char D,
skinny_ctrl* p_skinny_ctrl) {
unsigned char T [16];
adlen = adlen - 16;
rho_ad_eqov16(*A, s);
*A = *A + 16;
lfsr_gf56(CNT);
pad(*A, T, adlen);
*A = *A + adlen;
block_cipher(s,k,T,CNT,D,p_skinny_ctrl);
lfsr_gf56(CNT);
return 0;
}
unsigned long long ad_encryption_eq16 (
const unsigned char** A, unsigned char* s,
unsigned char* CNT) {
rho_ad_eqov16(*A, s);
*A = *A + 16;
lfsr_gf56(CNT);
return 0;
}
unsigned long long ad_encryption_ud16(
const unsigned char** A, unsigned char* s,
unsigned long long adlen,
unsigned char* CNT) {
rho_ad_ud16(*A, s, adlen);
*A = *A + adlen;
lfsr_gf56(CNT);
return 0;
}
int crypto_aead_encrypt (
unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec,
const unsigned char* npub,
const unsigned char* k) {
unsigned char s[16];
unsigned char CNT[8];
const unsigned char* A;
const unsigned char* M;
const unsigned char* N;
skinny_ctrl ctrl;
ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12;
(void) nsec;
A = ad;
M = m;
N = npub;
#ifdef ___ENABLE_DWORD_CAST
*(uint64_t*)(&s[0]) = 0;
*(uint64_t*)(&s[8]) = 0;
#else
*(uint32_t*)(&s[0]) = 0;
*(uint32_t*)(&s[4]) = 0;
*(uint32_t*)(&s[8]) = 0;
*(uint32_t*)(&s[12]) = 0;
#endif
reset_lfsr_gf56(CNT);
if (adlen == 0) { // AD is an empty string
lfsr_gf56(CNT);
nonce_encryption(N,CNT,s,k,0x1a,&ctrl);
}
else while (adlen > 0) {
if (adlen < 16) { // The last block of AD is odd and incomplete
adlen = ad_encryption_ud16(&A,s,adlen,CNT);
nonce_encryption(N,CNT,s,k,0x1a,&ctrl);
}
else if (adlen == 16) { // The last block of AD is odd and complete
adlen = ad_encryption_eq16(&A,s,CNT);
nonce_encryption(N,CNT,s,k,0x18,&ctrl);
}
else if (adlen < 32) { // The last block of AD is even and incomplete
adlen = ad_encryption_ov16(&A,s,k,adlen,CNT,0x08,&ctrl);
nonce_encryption(N,CNT,s,k,0x1a,&ctrl);
}
else if (adlen == 32) { // The last block of AD is even and complete
adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl);
nonce_encryption(N,CNT,s,k,0x18,&ctrl);
}
else { // A normal full pair of blocks of AD
adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl);
}
}
ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1;
reset_lfsr_gf56(CNT);
*clen = mlen + 16;
if (mlen == 0) { // M is an empty string
lfsr_gf56(CNT);
nonce_encryption(N,CNT,s,k,0x15,&ctrl);
}
else while (mlen > 0) {
if (mlen < 16) { // The last block of M is incomplete
mlen = msg_encryption_ud16(&M,&c,N,CNT,s,k,0x15,mlen,&ctrl);
}
else if (mlen == 16) { // The last block of M is complete
mlen = msg_encryption_eqov16(&M,&c,N,CNT,s,k,0x14,mlen,&ctrl);
}
else { // A normal full message block
mlen = msg_encryption_eqov16(&M,&c,N,CNT,s,k,0x04,mlen,&ctrl);
}
}
// Tag generation
generate_tag(&c,s,clen);
return 0;
}
int crypto_aead_decrypt(
unsigned char *m,unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c,unsigned long long clen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k) {
unsigned char s[16];
unsigned char T[16];
unsigned char CNT[8];
const unsigned char* A;
unsigned char* M;
const unsigned char* N;
skinny_ctrl ctrl;
ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12;
(void) nsec;
A = ad;
M = m;
N = npub;
#ifdef ___ENABLE_DWORD_CAST
*(uint64_t*)(&s[0]) = 0;
*(uint64_t*)(&s[8]) = 0;
#else
*(uint32_t*)(&s[0]) = 0;
*(uint32_t*)(&s[4]) = 0;
*(uint32_t*)(&s[8]) = 0;
*(uint32_t*)(&s[12]) = 0;
#endif
reset_lfsr_gf56(CNT);
if (adlen == 0) { // AD is an empty string
lfsr_gf56(CNT);
nonce_encryption(N,CNT,s,k,0x1a,&ctrl);
}
else while (adlen > 0) {
if (adlen < 16) { // The last block of AD is odd and incomplete
adlen = ad_encryption_ud16(&A,s,adlen,CNT);
nonce_encryption(N,CNT,s,k,0x1a,&ctrl);
}
else if (adlen == 16) { // The last block of AD is odd and complete
adlen = ad_encryption_eq16(&A,s,CNT);
nonce_encryption(N,CNT,s,k,0x18,&ctrl);
}
else if (adlen < 32) { // The last block of AD is even and incomplete
adlen = ad_encryption_ov16(&A,s,k,adlen,CNT,0x08,&ctrl);
nonce_encryption(N,CNT,s,k,0x1a,&ctrl);
}
else if (adlen == 32) { // The last block of AD is even and complete
adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl);
nonce_encryption(N,CNT,s,k,0x18,&ctrl);
}
else { // A normal full pair of blocks of AD
adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl);
}
}
ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1;
reset_lfsr_gf56(CNT);
clen = clen -16;
*mlen = clen;
if (clen == 0) { // C is an empty string
lfsr_gf56(CNT);
nonce_encryption(N,CNT,s,k,0x15,&ctrl);
}
else while (clen > 0) {
if (clen < 16) { // The last block of C is incomplete
clen = msg_decryption_ud16(&M,&c,N,CNT,s,k,0x15,clen,&ctrl);
}
else if (clen == 16) { // The last block of C is complete
clen = msg_decryption_eqov16(&M,&c,N,CNT,s,k,0x14,clen,&ctrl);
}
else { // A normal full message block
clen = msg_decryption_eqov16(&M,&c,N,CNT,s,k,0x04,clen,&ctrl);
}
}
// Tag generation
g8A_for_Tag_Generation(s, T);
for (int i = 0; i < 16; i++) {
if (T[i] != (*(c+i))) {
return -1;
}
}
return 0;
}
#define ___SKINNY_LOOP
//#define ___NUM_OF_ROUNDS_56
#if (defined(__riscv_xlen) && (__riscv_xlen == 64))
#define ___ENABLE_DWORD_CAST
#endif
#include <stdint.h>
typedef struct ___skinny_ctrl {
#ifdef ___NUM_OF_ROUNDS_56
unsigned char roundKeys[960]; // number of rounds : 56
#else
unsigned char roundKeys[704]; // number of rounds : 40
#endif
void (*func_skinny_128_384_enc)(unsigned char*, struct ___skinny_ctrl*, unsigned char* CNT, unsigned char* T, const unsigned char* K);
} skinny_ctrl;
extern void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K);
extern void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K);
extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K);
#define pack_word(x0, x1, x2, x3, w) \
w = ((x3) << 24) ^ \
((x2) << 16) ^ \
((x1) << 8) ^ \
(x0);
#define unpack_word(x0, x1, x2, x3, w) \
x0 = ((w) & 0xff); \
x1 = (((w) >> 8) & 0xff); \
x2 = (((w) >> 16) & 0xff); \
x3 = ((w) >> 24);
#ifdef ___ENABLE_DWORD_CAST
#define PERMUTATION() \
/* permutation */ \
\
/* 7 6 5 4 3 2 1 0 */ \
/* 5 7 2 3 6 0 4 1 */ \
\
/* dw (7 6 5 4 3 2 1 0) */ \
\
/* dw (5 7 2 3 6 0 4 1) */ \
\
dt0 = dw >> 24; /* - - - 7 6 5 4 3 */ \
dt0 = dt0 & 0x00000000ff00ff00; /* - - - - 6 - 4 - */ \
\
dt1 = dw << 16; /* 5 4 3 2 1 0 - - */ \
dt1 = dt1 & 0xff00000000ff0000; /* 5 - - - - 0 - - */ \
dt0 = dt0 ^ dt1; /* 5 - - - 6 0 4 - */ \
\
dt1 = dw >> 8; /* - 7 6 5 4 3 2 1 */ \
dt1 = dt1 & 0x00ff0000000000ff; /* - 7 - - - - - 1 */ \
dt0 = dt0 ^ dt1; /* 5 7 - - 6 0 4 1 */ \
\
dt1 = dw << 8; /* 6 5 4 3 2 1 0 - */ \
dt1 = dt1 & 0x000000ff00000000; /* - - - 3 - - - - */ \
dt0 = dt0 ^ dt1; /* 5 7 - 3 6 0 4 1 */ \
\
dt1 = dw << 24; /* 4 3 2 1 0 - - - */ \
dw = dt1 & 0x0000ff0000000000; /* - - 2 - - - - - */ \
dw = dw ^ dt0; /* 5 7 2 3 6 0 4 1 */
#else
#define PERMUTATION() \
/* permutation */ \
\
/* 7 6 5 4 3 2 1 0 */ \
/* 5 7 2 3 6 0 4 1 */ \
\
/* w0 (3 2 1 0) */ \
/* w1 (7 6 5 4) */ \
\
/* w0 (6 0 4 1) */ \
/* w1 (5 7 2 3) */ \
\
t0 = w1 << 8; /* 6 5 4 - */ \
t0 = t0 & 0xff00ff00; /* 6 - 4 - */ \
\
t1 = w1 << 16; /* 5 4 - - */ \
t1 = t1 & 0xff000000; /* 5 - - - */ \
\
t2 = w1 & 0xff000000; /* 7 - - - */ \
t2 = t2 >> 8; /* - 7 - - */ \
t1 = t1 ^ t2; /* 5 7 - - */ \
\
t2 = w0 & 0xff000000; /* 3 - - - */ \
t2 = t2 >> 24; /* - - - 3 */ \
t1 = t1 ^ t2; /* 5 7 - 3 */ \
\
w1 = w0 >> 8; /* - 3 2 1 */ \
w1 = w1 & 0x0000ff00; /* - - 2 - */ \
w1 = w1 ^ t1; /* 5 7 2 3 */ \
\
t2 = w0 & 0x0000ff00; /* - - 1 - */ \
t2 = t2 >> 8; /* - - - 1 */ \
t0 = t0 ^ t2; /* 6 - 4 1 */ \
\
w0 = w0 << 16; /* 1 0 - - */ \
w0 = w0 & 0x00ff0000; /* - 0 - - */ \
w0 = w0 ^ t0; /* 6 0 4 1 */
#endif
/******************************************************************************
* Copyright (c) 2020, NEC Corporation.
*
* THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND.
*
*****************************************************************************/
/*
* SKINNY-128-384
*
* load * AC(c0 c1) ^ TK3
* calc AC(c0 c1) ^ TK2 -> store
* ART(TK2)
*
* number of rounds : 40 or 56
*/
#include "skinny.h"
#ifdef ___ENABLE_DWORD_CAST
#define PERMUTATION_TK2() \
\
/* permutation */ \
\
PERMUTATION() \
\
/* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \
dw = ((dw << 1) & 0xfefefefefefefefe) ^ \
(((dw >> 7) ^ (dw >> 5)) & 0x0101010101010101); \
\
/* Load TK3 */ \
/* TK2^TK3^AC(c0 c1) */ \
/* store */ \
*tk2 = dw ^ *tk3; \
tk2 += 2; \
tk3 += 2;
#ifndef ___SKINNY_LOOP
void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys)
{
uint64_t* tk2; // used in MACRO
uint64_t* tk3; // used in MACRO
uint64_t dt0; // used in MACRO
uint64_t dt1; // used in MACRO
uint64_t dw;
// odd
// load master key
// load master key
dw = *(uint64_t*)&roundKeys[16];
tk2 = (uint64_t*)&roundKeys[64];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[384];
#else
tk3 = (uint64_t*)&roundKeys[512];
#endif
// 1st round
*tk2 = dw ^ *tk3;
tk2 += 2;
tk3 += 2;
// 3rd,5th, ... ,37th,39th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#ifdef ___NUM_OF_ROUNDS_56
// 41th,43th, ... ,51th,53th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#endif
// even
// load master key
dw = *(uint64_t*)&roundKeys[24];
tk2 = (uint64_t*)&roundKeys[72];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[392];
#else
tk3 = (uint64_t*)&roundKeys[520];
#endif
// 2nd,4th, ... ,54th,56th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#ifdef ___NUM_OF_ROUNDS_56
// 42nd,44th, ... ,54th,56th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#endif
}
#else /* ___SKINNY_LOOP */
void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys)
{
uint64_t* tk2; // used in MACRO
uint64_t* tk3; // used in MACRO
uint64_t dt0; // used in MACRO
uint64_t dt1; // used in MACRO
uint64_t dw;
// odd
// load master key
dw = *(uint64_t*)&roundKeys[16];
tk2 = (uint64_t*)&roundKeys[64];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[384];
#else
tk3 = (uint64_t*)&roundKeys[512];
#endif
// 1st round
*tk2 = dw ^ *tk3;
tk2 += 2;
tk3 += 2;
// 3rd,5th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<19;i++)
#else
for(int i=0;i<27;i++)
#endif
{
PERMUTATION_TK2();
}
// even
// load master key
dw = *(uint64_t*)&roundKeys[24];
tk2 = (uint64_t*)&roundKeys[72];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[392];
#else
tk3 = (uint64_t*)&roundKeys[520];
#endif
// 2nd,4th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<20;i++)
#else
for(int i=0;i<28;i++)
#endif
{
PERMUTATION_TK2();
}
}
#endif /* ___SKINNY_LOOP */
#else /* ___ENABLE_DWORD_CAST */
#define PERMUTATION_TK2() \
\
/* permutation */ \
\
PERMUTATION() \
\
/* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \
w0 = ((w0 << 1) & 0xfefefefe) ^ \
(((w0 >> 7) ^ (w0 >> 5)) & 0x01010101); \
w1 = ((w1 << 1) & 0xfefefefe) ^ \
(((w1 >> 7) ^ (w1 >> 5)) & 0x01010101); \
\
/* Load TK3 */ \
/* TK2^TK3^AC(c0 c1) */ \
/* store */ \
*tk2++ = w0 ^ *tk3++; \
*tk2++ = w1 ^ *tk3++; \
tk2 += 2; \
tk3 += 2;
#ifndef ___SKINNY_LOOP
void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys)
{
uint32_t* tk2; // used in MACRO
uint32_t* tk3; // used in MACRO
uint32_t t0; // used in MACRO
uint32_t t1; // used in MACRO
uint32_t t2; // used in MACRO
uint32_t w0;
uint32_t w1;
// odd
// load master key
w0 = *(uint32_t*)&roundKeys[16];
w1 = *(uint32_t*)&roundKeys[20];
tk2 = (uint32_t*)&roundKeys[64];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[384];
#else
tk3 = (uint32_t*)&roundKeys[512];
#endif
// 1st round
*tk2++ = w0 ^ *tk3++;
*tk2++ = w1 ^ *tk3++;
tk2 += 2;
tk3 += 2;
// 3rd,5th, ... ,37th,39th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#ifdef ___NUM_OF_ROUNDS_56
// 41th,43th, ... ,51th,53th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#endif
// even
// load master key
w0 = *(uint32_t*)&roundKeys[24];
w1 = *(uint32_t*)&roundKeys[28];
tk2 = (uint32_t*)&roundKeys[72];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[392];
#else
tk3 = (uint32_t*)&roundKeys[520];
#endif
// 2nd,4th, ... ,54th,56th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#ifdef ___NUM_OF_ROUNDS_56
// 42nd,44th, ... ,54th,56th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#endif
}
#else /* ___SKINNY_LOOP */
void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys)
{
uint32_t* tk2; // used in MACRO
uint32_t* tk3; // used in MACRO
uint32_t t0; // used in MACRO
uint32_t t1; // used in MACRO
uint32_t t2; // used in MACRO
uint32_t w0;
uint32_t w1;
// odd
// load master key
w0 = *(uint32_t*)&roundKeys[16];
w1 = *(uint32_t*)&roundKeys[20];
tk2 = (uint32_t*)&roundKeys[64];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[384];
#else
tk3 = (uint32_t*)&roundKeys[512];
#endif
// 1st round
*tk2++ = w0 ^ *tk3++;
*tk2++ = w1 ^ *tk3++;
tk2 += 2;
tk3 += 2;
// 3rd,5th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<19;i++)
#else
for(int i=0;i<27;i++)
#endif
{
PERMUTATION_TK2();
}
// even
// load master key
w0 = *(uint32_t*)&roundKeys[24];
w1 = *(uint32_t*)&roundKeys[28];
tk2 = (uint32_t*)&roundKeys[72];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[392];
#else
tk3 = (uint32_t*)&roundKeys[520];
#endif
// 2nd,4th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<20;i++)
#else
for(int i=0;i<28;i++)
#endif
{
PERMUTATION_TK2();
}
}
#endif /* ___SKINNY_LOOP */
#endif /* ___ENABLE_DWORD_CAST */
/******************************************************************************
* Copyright (c) 2020, NEC Corporation.
*
* THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND.
*
*****************************************************************************/
/*
* SKINNY-128-384
*
* AC(c0 c1) ^ TK3 -> store
* ART(TK3)
*
* number of rounds : 40 or 56
*/
#include "skinny.h"
#ifdef ___ENABLE_DWORD_CAST
#define PERMUTATION_TK3(c0Val, c1Val) \
\
/* permutation */ \
\
PERMUTATION() \
\
/* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \
dw = ((dw >> 1) & 0x7f7f7f7f7f7f7f7f) ^ \
(((dw << 7) ^ (dw << 1)) & 0x8080808080808080); \
\
/* K3^AC(c0 c1) */ \
/* store */ \
dt0 = dw ^ c0Val; \
*tk3 = dt0 ^ ((uint64_t)c1Val << 40); \
tk3 += 2;
#ifndef ___SKINNY_LOOP
void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys)
{
uint64_t *tk3;
uint64_t dt0; // used in MACRO
uint64_t dt1; // used in MACRO
uint64_t dw;
// odd
// load master key
dw = *(uint64_t*)&roundKeys[32];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[384];
#else
tk3 = (uint64_t*)&roundKeys[512];
#endif
// 1st round
*tk3++ = dw ^ 0x01;
tk3 += 1;
// 3rd,5th, ... ,37th,39th round
PERMUTATION_TK3(0x7, 0x0);
PERMUTATION_TK3(0xf, 0x1);
PERMUTATION_TK3(0xd, 0x3);
PERMUTATION_TK3(0x7, 0x3);
PERMUTATION_TK3(0xe, 0x1);
PERMUTATION_TK3(0x9, 0x3);
PERMUTATION_TK3(0x7, 0x2);
PERMUTATION_TK3(0xd, 0x1);
PERMUTATION_TK3(0x5, 0x3);
PERMUTATION_TK3(0x6, 0x1);
PERMUTATION_TK3(0x8, 0x1);
PERMUTATION_TK3(0x1, 0x2);
PERMUTATION_TK3(0x5, 0x0);
PERMUTATION_TK3(0x7, 0x1);
PERMUTATION_TK3(0xc, 0x1);
PERMUTATION_TK3(0x1, 0x3);
PERMUTATION_TK3(0x6, 0x0);
PERMUTATION_TK3(0xb, 0x1);
PERMUTATION_TK3(0xd, 0x2);
#ifdef ___NUM_OF_ROUNDS_56
// 41td,43th, ... ,53th,55th round
PERMUTATION_TK3(0x4, 0x3);
PERMUTATION_TK3(0x2, 0x1);
PERMUTATION_TK3(0x8, 0x0);
PERMUTATION_TK3(0x2, 0x2);
PERMUTATION_TK3(0x9, 0x0);
PERMUTATION_TK3(0x6, 0x2);
PERMUTATION_TK3(0x9, 0x1);
PERMUTATION_TK3(0x5, 0x2);
#endif
// even
// load master key
dw = *(uint64_t*)&roundKeys[40];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[392];
#else
tk3 = (uint64_t*)&roundKeys[520];
#endif
// 2nd,4th, ... ,38th,40th round
PERMUTATION_TK3(0x3, 0x0);
PERMUTATION_TK3(0xf, 0x0);
PERMUTATION_TK3(0xe, 0x3);
PERMUTATION_TK3(0xb, 0x3);
PERMUTATION_TK3(0xf, 0x2);
PERMUTATION_TK3(0xc, 0x3);
PERMUTATION_TK3(0x3, 0x3);
PERMUTATION_TK3(0xe, 0x0);
PERMUTATION_TK3(0xa, 0x3);
PERMUTATION_TK3(0xb, 0x2);
PERMUTATION_TK3(0xc, 0x2);
PERMUTATION_TK3(0x0, 0x3);
PERMUTATION_TK3(0x2, 0x0);
PERMUTATION_TK3(0xb, 0x0);
PERMUTATION_TK3(0xe, 0x2);
PERMUTATION_TK3(0x8, 0x3);
PERMUTATION_TK3(0x3, 0x2);
PERMUTATION_TK3(0xd, 0x0);
PERMUTATION_TK3(0x6, 0x3);
PERMUTATION_TK3(0xa, 0x1);
#ifdef ___NUM_OF_ROUNDS_56
// 42nd,44th, ... ,54th,56th round
PERMUTATION_TK3(0x9, 0x2);
PERMUTATION_TK3(0x4, 0x2);
PERMUTATION_TK3(0x1, 0x1);
PERMUTATION_TK3(0x4, 0x0);
PERMUTATION_TK3(0x3, 0x1);
PERMUTATION_TK3(0xc, 0x0);
PERMUTATION_TK3(0x2, 0x3);
PERMUTATION_TK3(0xa, 0x0);
#endif
}
#else /* ___SKINNY_LOOP */
void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC)
{
uint64_t *tk3;
uint64_t dt0; // used in MACRO
uint64_t dt1; // used in MACRO
uint64_t dw;
uint64_t c0;
uint64_t c1;
// odd
// load master key
dw = *(uint64_t*)&roundKeys[32];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[384];
#else
tk3 = (uint64_t*)&roundKeys[512];
#endif
// 1st round
*tk3++ = dw ^ 0x01;
tk3 += 1;
pRC += 4;
// 3rd,5th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<19;i++)
#else
for(int i=0;i<27;i++)
#endif
{
c0 = *pRC++;
c1 = *pRC++;
pRC += 2;
PERMUTATION_TK3(c0, c1);
}
// even
// load master key
dw = *(uint64_t*)&roundKeys[40];
#ifndef ___NUM_OF_ROUNDS_56
pRC -= 78;
tk3 = (uint64_t*)&roundKeys[392];
#else
pRC -= 110;
tk3 = (uint64_t*)&roundKeys[520];
#endif
// 2nd,4th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<20;i++)
#else
for(int i=0;i<28;i++)
#endif
{
c0 = *pRC++;
c1 = *pRC++;
pRC += 2;
PERMUTATION_TK3(c0, c1);
}
}
#endif /* ___SKINNY_LOOP */
#else /* ___ENABLE_DWORD_CAST */
#define PERMUTATION_TK3(c0Val, c1Val) \
\
/* permutation */ \
\
PERMUTATION() \
\
/* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \
w0 = ((w0 >> 1) & 0x7f7f7f7f) ^ \
(((w0 << 7) ^ (w0 << 1)) & 0x80808080); \
w1 = ((w1 >> 1) & 0x7f7f7f7f) ^ \
(((w1 << 7) ^ (w1 << 1)) & 0x80808080); \
\
/* K3^AC(c0 c1) */ \
/* store */ \
*tk3++ = w0 ^ c0Val; \
*tk3++ = w1 ^ ((uint32_t)c1Val << 8); \
tk3 += 2;
#ifndef ___SKINNY_LOOP
void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys)
{
uint32_t *tk3;
uint32_t t0; // used in MACRO
uint32_t t1; // used in MACRO
uint32_t t2; // used in MACRO
uint32_t w0;
uint32_t w1;
// odd
// load master key
w0 = *(uint32_t*)&roundKeys[32];
w1 = *(uint32_t*)&roundKeys[36];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[384];
#else
tk3 = (uint32_t*)&roundKeys[512];
#endif
// 1st round
*tk3++ = w0 ^ 0x01;
*tk3++ = w1;
tk3 += 2;
// 3rd,5th, ... ,37th,39th round
PERMUTATION_TK3(0x7, 0x0);
PERMUTATION_TK3(0xf, 0x1);
PERMUTATION_TK3(0xd, 0x3);
PERMUTATION_TK3(0x7, 0x3);
PERMUTATION_TK3(0xe, 0x1);
PERMUTATION_TK3(0x9, 0x3);
PERMUTATION_TK3(0x7, 0x2);
PERMUTATION_TK3(0xd, 0x1);
PERMUTATION_TK3(0x5, 0x3);
PERMUTATION_TK3(0x6, 0x1);
PERMUTATION_TK3(0x8, 0x1);
PERMUTATION_TK3(0x1, 0x2);
PERMUTATION_TK3(0x5, 0x0);
PERMUTATION_TK3(0x7, 0x1);
PERMUTATION_TK3(0xc, 0x1);
PERMUTATION_TK3(0x1, 0x3);
PERMUTATION_TK3(0x6, 0x0);
PERMUTATION_TK3(0xb, 0x1);
PERMUTATION_TK3(0xd, 0x2);
#ifdef ___NUM_OF_ROUNDS_56
// 41td,43th, ... ,53th,55th round
PERMUTATION_TK3(0x4, 0x3);
PERMUTATION_TK3(0x2, 0x1);
PERMUTATION_TK3(0x8, 0x0);
PERMUTATION_TK3(0x2, 0x2);
PERMUTATION_TK3(0x9, 0x0);
PERMUTATION_TK3(0x6, 0x2);
PERMUTATION_TK3(0x9, 0x1);
PERMUTATION_TK3(0x5, 0x2);
#endif
// even
// load master key
w0 = *(uint32_t*)&roundKeys[40];
w1 = *(uint32_t*)&roundKeys[44];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[392];
#else
tk3 = (uint32_t*)&roundKeys[520];
#endif
// 2nd,4th, ... ,38th,40th round
PERMUTATION_TK3(0x3, 0x0);
PERMUTATION_TK3(0xf, 0x0);
PERMUTATION_TK3(0xe, 0x3);
PERMUTATION_TK3(0xb, 0x3);
PERMUTATION_TK3(0xf, 0x2);
PERMUTATION_TK3(0xc, 0x3);
PERMUTATION_TK3(0x3, 0x3);
PERMUTATION_TK3(0xe, 0x0);
PERMUTATION_TK3(0xa, 0x3);
PERMUTATION_TK3(0xb, 0x2);
PERMUTATION_TK3(0xc, 0x2);
PERMUTATION_TK3(0x0, 0x3);
PERMUTATION_TK3(0x2, 0x0);
PERMUTATION_TK3(0xb, 0x0);
PERMUTATION_TK3(0xe, 0x2);
PERMUTATION_TK3(0x8, 0x3);
PERMUTATION_TK3(0x3, 0x2);
PERMUTATION_TK3(0xd, 0x0);
PERMUTATION_TK3(0x6, 0x3);
PERMUTATION_TK3(0xa, 0x1);
#ifdef ___NUM_OF_ROUNDS_56
// 42nd,44th, ... ,54th,56th round
PERMUTATION_TK3(0x9, 0x2);
PERMUTATION_TK3(0x4, 0x2);
PERMUTATION_TK3(0x1, 0x1);
PERMUTATION_TK3(0x4, 0x0);
PERMUTATION_TK3(0x3, 0x1);
PERMUTATION_TK3(0xc, 0x0);
PERMUTATION_TK3(0x2, 0x3);
PERMUTATION_TK3(0xa, 0x0);
#endif
}
#else /* ___SKINNY_LOOP */
void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC)
{
uint32_t *tk3;
uint32_t t0; // used in MACRO
uint32_t t1; // used in MACRO
uint32_t t2; // used in MACRO
uint32_t w0;
uint32_t w1;
uint32_t c0;
uint32_t c1;
// odd
// load master key
w0 = *(uint32_t*)&roundKeys[32];
w1 = *(uint32_t*)&roundKeys[36];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[384];
#else
tk3 = (uint32_t*)&roundKeys[512];
#endif
// 1st round
*tk3++ = w0 ^ 0x01;
*tk3++ = w1;
tk3 += 2;
pRC += 4;
// 3rd,5th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<19;i++)
#else
for(int i=0;i<27;i++)
#endif
{
c0 = *pRC++;
c1 = *pRC++;
pRC += 2;
PERMUTATION_TK3(c0, c1);
}
// even
// load master key
w0 = *(uint32_t*)&roundKeys[40];
w1 = *(uint32_t*)&roundKeys[44];
#ifndef ___NUM_OF_ROUNDS_56
pRC -= 78;
tk3 = (uint32_t*)&roundKeys[392];
#else
pRC -= 110;
tk3 = (uint32_t*)&roundKeys[520];
#endif
// 2nd,4th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<20;i++)
#else
for(int i=0;i<28;i++)
#endif
{
c0 = *pRC++;
c1 = *pRC++;
pRC += 2;
PERMUTATION_TK3(c0, c1);
}
}
#endif /* ___SKINNY_LOOP */
#endif /* ___ENABLE_DWORD_CAST */
/******************************************************************************
* Copyright (c) 2020, NEC Corporation.
*
* THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND.
*
*****************************************************************************/
/*
* SKINNY-128-384
*
* ART(TK1) -> store
* load AC(c0 c1) ^ TK3 ^ TK2
* load TK1
* calc AC(c0 c1) ^ TK3 ^ TK2 ^ TK1 -> use at (AC->ART)
* SC->SR->(AC->ART)->MC
*
* number of rounds : 40 or 56
*/
#include "skinny.h"
/*
* S-BOX
*/
unsigned char SBOX[]
= {
// Original
0x65, 0x4c, 0x6a, 0x42, 0x4b, 0x63, 0x43, 0x6b, 0x55, 0x75, 0x5a, 0x7a, 0x53, 0x73, 0x5b, 0x7b,
0x35, 0x8c, 0x3a, 0x81, 0x89, 0x33, 0x80, 0x3b, 0x95, 0x25, 0x98, 0x2a, 0x90, 0x23, 0x99, 0x2b,
0xe5, 0xcc, 0xe8, 0xc1, 0xc9, 0xe0, 0xc0, 0xe9, 0xd5, 0xf5, 0xd8, 0xf8, 0xd0, 0xf0, 0xd9, 0xf9,
0xa5, 0x1c, 0xa8, 0x12, 0x1b, 0xa0, 0x13, 0xa9, 0x05, 0xb5, 0x0a, 0xb8, 0x03, 0xb0, 0x0b, 0xb9,
0x32, 0x88, 0x3c, 0x85, 0x8d, 0x34, 0x84, 0x3d, 0x91, 0x22, 0x9c, 0x2c, 0x94, 0x24, 0x9d, 0x2d,
0x62, 0x4a, 0x6c, 0x45, 0x4d, 0x64, 0x44, 0x6d, 0x52, 0x72, 0x5c, 0x7c, 0x54, 0x74, 0x5d, 0x7d,
0xa1, 0x1a, 0xac, 0x15, 0x1d, 0xa4, 0x14, 0xad, 0x02, 0xb1, 0x0c, 0xbc, 0x04, 0xb4, 0x0d, 0xbd,
0xe1, 0xc8, 0xec, 0xc5, 0xcd, 0xe4, 0xc4, 0xed, 0xd1, 0xf1, 0xdc, 0xfc, 0xd4, 0xf4, 0xdd, 0xfd,
0x36, 0x8e, 0x38, 0x82, 0x8b, 0x30, 0x83, 0x39, 0x96, 0x26, 0x9a, 0x28, 0x93, 0x20, 0x9b, 0x29,
0x66, 0x4e, 0x68, 0x41, 0x49, 0x60, 0x40, 0x69, 0x56, 0x76, 0x58, 0x78, 0x50, 0x70, 0x59, 0x79,
0xa6, 0x1e, 0xaa, 0x11, 0x19, 0xa3, 0x10, 0xab, 0x06, 0xb6, 0x08, 0xba, 0x00, 0xb3, 0x09, 0xbb,
0xe6, 0xce, 0xea, 0xc2, 0xcb, 0xe3, 0xc3, 0xeb, 0xd6, 0xf6, 0xda, 0xfa, 0xd3, 0xf3, 0xdb, 0xfb,
0x31, 0x8a, 0x3e, 0x86, 0x8f, 0x37, 0x87, 0x3f, 0x92, 0x21, 0x9e, 0x2e, 0x97, 0x27, 0x9f, 0x2f,
0x61, 0x48, 0x6e, 0x46, 0x4f, 0x67, 0x47, 0x6f, 0x51, 0x71, 0x5e, 0x7e, 0x57, 0x77, 0x5f, 0x7f,
0xa2, 0x18, 0xae, 0x16, 0x1f, 0xa7, 0x17, 0xaf, 0x01, 0xb2, 0x0e, 0xbe, 0x07, 0xb7, 0x0f, 0xbf,
0xe2, 0xca, 0xee, 0xc6, 0xcf, 0xe7, 0xc7, 0xef, 0xd2, 0xf2, 0xde, 0xfe, 0xd7, 0xf7, 0xdf, 0xff,
};
/*
* S-BOX ^ AC(c2)
*/
unsigned char SBOX2[]
= { // Original ^ c2(0x02)
0x67, 0x4e, 0x68, 0x40, 0x49, 0x61, 0x41, 0x69, 0x57, 0x77, 0x58, 0x78, 0x51, 0x71, 0x59, 0x79,
0x37, 0x8e, 0x38, 0x83, 0x8b, 0x31, 0x82, 0x39, 0x97, 0x27, 0x9a, 0x28, 0x92, 0x21, 0x9b, 0x29,
0xe7, 0xce, 0xea, 0xc3, 0xcb, 0xe2, 0xc2, 0xeb, 0xd7, 0xf7, 0xda, 0xfa, 0xd2, 0xf2, 0xdb, 0xfb,
0xa7, 0x1e, 0xaa, 0x10, 0x19, 0xa2, 0x11, 0xab, 0x07, 0xb7, 0x08, 0xba, 0x01, 0xb2, 0x09, 0xbb,
0x30, 0x8a, 0x3e, 0x87, 0x8f, 0x36, 0x86, 0x3f, 0x93, 0x20, 0x9e, 0x2e, 0x96, 0x26, 0x9f, 0x2f,
0x60, 0x48, 0x6e, 0x47, 0x4f, 0x66, 0x46, 0x6f, 0x50, 0x70, 0x5e, 0x7e, 0x56, 0x76, 0x5f, 0x7f,
0xa3, 0x18, 0xae, 0x17, 0x1f, 0xa6, 0x16, 0xaf, 0x00, 0xb3, 0x0e, 0xbe, 0x06, 0xb6, 0x0f, 0xbf,
0xe3, 0xca, 0xee, 0xc7, 0xcf, 0xe6, 0xc6, 0xef, 0xd3, 0xf3, 0xde, 0xfe, 0xd6, 0xf6, 0xdf, 0xff,
0x34, 0x8c, 0x3a, 0x80, 0x89, 0x32, 0x81, 0x3b, 0x94, 0x24, 0x98, 0x2a, 0x91, 0x22, 0x99, 0x2b,
0x64, 0x4c, 0x6a, 0x43, 0x4b, 0x62, 0x42, 0x6b, 0x54, 0x74, 0x5a, 0x7a, 0x52, 0x72, 0x5b, 0x7b,
0xa4, 0x1c, 0xa8, 0x13, 0x1b, 0xa1, 0x12, 0xa9, 0x04, 0xb4, 0x0a, 0xb8, 0x02, 0xb1, 0x0b, 0xb9,
0xe4, 0xcc, 0xe8, 0xc0, 0xc9, 0xe1, 0xc1, 0xe9, 0xd4, 0xf4, 0xd8, 0xf8, 0xd1, 0xf1, 0xd9, 0xf9,
0x33, 0x88, 0x3c, 0x84, 0x8d, 0x35, 0x85, 0x3d, 0x90, 0x23, 0x9c, 0x2c, 0x95, 0x25, 0x9d, 0x2d,
0x63, 0x4a, 0x6c, 0x44, 0x4d, 0x65, 0x45, 0x6d, 0x53, 0x73, 0x5c, 0x7c, 0x55, 0x75, 0x5d, 0x7d,
0xa0, 0x1a, 0xac, 0x14, 0x1d, 0xa5, 0x15, 0xad, 0x03, 0xb0, 0x0c, 0xbc, 0x05, 0xb5, 0x0d, 0xbd,
0xe0, 0xc8, 0xec, 0xc4, 0xcd, 0xe5, 0xc5, 0xed, 0xd0, 0xf0, 0xdc, 0xfc, 0xd5, 0xf5, 0xdd, 0xfd,
};
#ifdef ___SKINNY_LOOP
/*
* Round Constants
*/
unsigned char RC[]
= {
0x01, 0x00, 0x03, 0x00, 0x07, 0x00, 0x0f, 0x00, 0x0f, 0x01, 0x0e, 0x03, 0x0d, 0x03, 0x0b, 0x03,
0x07, 0x03, 0x0f, 0x02, 0x0e, 0x01, 0x0c, 0x03, 0x09, 0x03, 0x03, 0x03, 0x07, 0x02, 0x0e, 0x00,
0x0d, 0x01, 0x0a, 0x03, 0x05, 0x03, 0x0b, 0x02, 0x06, 0x01, 0x0c, 0x02, 0x08, 0x01, 0x00, 0x03,
0x01, 0x02, 0x02, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x07, 0x01, 0x0e, 0x02, 0x0c, 0x01, 0x08, 0x03,
0x01, 0x03, 0x03, 0x02, 0x06, 0x00, 0x0d, 0x00, 0x0b, 0x01, 0x06, 0x03, 0x0d, 0x02, 0x0a, 0x01,
#ifdef ___NUM_OF_ROUNDS_56
0x04, 0x03, 0x09, 0x02, 0x02, 0x01, 0x04, 0x02, 0x08, 0x00, 0x01, 0x01, 0x02, 0x02, 0x04, 0x00,
0x09, 0x00, 0x03, 0x01, 0x06, 0x02, 0x0c, 0x00, 0x09, 0x01, 0x02, 0x03, 0x05, 0x02, 0x0a, 0x00,
#endif
};
#endif
extern void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2);
extern void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys);
#ifdef ___SKINNY_LOOP
extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC);
#else
extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys);
#endif
void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K)
{
uint32_t *pt = (uint32_t*)&pskinny_ctrl->roundKeys[0];
pt[0] = *(uint32_t*)(&CNT[0]);
pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]);
pt[4] = *(uint32_t*)(&T[0]);
pack_word(T[7], T[4], T[5], T[6], pt[5]);
pt[6] = *(uint32_t*)(&T[8]);
pack_word(T[15], T[12], T[13], T[14], pt[7]);
pt[8] = *(uint32_t*)(&K[0]);
pack_word(K[7], K[4], K[5], K[6], pt[9]);
pt[10] = *(uint32_t*)(&K[8]);
pack_word(K[15], K[12], K[13], K[14], pt[11]);
#ifdef ___SKINNY_LOOP
RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys, RC);
#else
RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys);
#endif
RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys);
Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2);
pskinny_ctrl->func_skinny_128_384_enc = skinny_128_384_enc12_12;
}
void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K)
{
(void)K;
uint32_t *pt = &pskinny_ctrl->roundKeys[0];
pt[0] = *(uint32_t*)(&CNT[0]);
pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]);
pt[4] = *(uint32_t*)(&T[0]);
pack_word(T[7], T[4], T[5], T[6], pt[5]);
pt[6] = *(uint32_t*)(&T[8]);
pack_word(T[15], T[12], T[13], T[14], pt[7]);
RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys);
Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2);
}
extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K)
{
(void)T;
(void)K;
uint32_t *pt = &pskinny_ctrl->roundKeys[0];
pt[0] = *(uint32_t*)(&CNT[0]);
pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]);
Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2);
}
#define PERMUTATION_TK1() \
\
/* permutation */ \
{ \
unsigned char tmp0 = roundKeys[0]; \
unsigned char tmp1 = roundKeys[1]; \
unsigned char tmp2 = roundKeys[2]; \
unsigned char tmp3 = roundKeys[3]; \
unsigned char tmp4 = roundKeys[4]; \
unsigned char tmp5 = roundKeys[5]; \
unsigned char tmp6 = roundKeys[6]; \
unsigned char tmp7 = roundKeys[7]; \
\
unsigned char* dst = &roundKeys[8]; \
\
/* 5 7 2 3 6 0 4 1 */ \
*dst++ = tmp1; \
*dst++ = tmp4; \
*dst++ = tmp0; \
*dst++ = tmp6; \
*dst++ = tmp3; \
*dst++ = tmp2; \
*dst++ = tmp7; \
*dst++ = tmp5; \
\
/* 2 5 0 6 7 1 3 4 */ \
*dst++ = tmp4; \
*dst++ = tmp3; \
*dst++ = tmp1; \
*dst++ = tmp7; \
*dst++ = tmp6; \
*dst++ = tmp0; \
*dst++ = tmp5; \
*dst++ = tmp2; \
\
/* 0 2 1 7 5 4 6 3 */ \
*dst++ = tmp3; \
*dst++ = tmp6; \
*dst++ = tmp4; \
*dst++ = tmp5; \
*dst++ = tmp7; \
*dst++ = tmp1; \
*dst++ = tmp2; \
*dst++ = tmp0; \
\
/* 1 0 4 5 2 3 7 6 */ \
*dst++ = tmp6; \
*dst++ = tmp7; \
*dst++ = tmp3; \
*dst++ = tmp2; \
*dst++ = tmp5; \
*dst++ = tmp4; \
*dst++ = tmp0; \
*dst++ = tmp1; \
\
/* 4 1 3 2 0 6 5 7 */ \
*dst++ = tmp7; \
*dst++ = tmp5; \
*dst++ = tmp6; \
*dst++ = tmp0; \
*dst++ = tmp2; \
*dst++ = tmp3; \
*dst++ = tmp1; \
*dst++ = tmp4; \
\
/* 3 4 6 0 1 7 2 5 */ \
*dst++ = tmp5; \
*dst++ = tmp2; \
*dst++ = tmp7; \
*dst++ = tmp1; \
*dst++ = tmp0; \
*dst++ = tmp6; \
*dst++ = tmp4; \
*dst++ = tmp3; \
\
/* 6 3 7 1 4 5 0 2 */ \
*dst++ = tmp2; \
*dst++ = tmp0; \
*dst++ = tmp5; \
*dst++ = tmp4; \
*dst++ = tmp1; \
*dst++ = tmp7; \
*dst++ = tmp3; \
*dst++ = tmp6; \
}
#define SBOX_0(b0, b1, b2, b3) \
\
t0 = sbox[b0]; \
t1 = sbox[b1]; \
t2 = sbox[b2]; \
t3 = sbox[b3]; \
\
b0 = (uint8_t)t0; \
b1 = (uint8_t)t1; \
b2 = (uint8_t)t2; \
b3 = (uint8_t)t3;
#define SBOX_8(b0, b1, b2, b3) \
\
t0 = sbox[b0]; \
t1 = sbox[b1]; \
t2 = sbox[b2]; \
t3 = sbox[b3]; \
\
b0 = (uint8_t)t3; \
b1 = (uint8_t)t0; \
b2 = (uint8_t)t1; \
b3 = (uint8_t)t2;
#define SBOX_16(b0, b1, b2, b3) \
\
t0 = sbox2[b0]; /* AC(c2) */ \
t1 = sbox[b1]; \
t2 = sbox[b2]; \
t3 = sbox[b3]; \
\
b0 = (uint8_t)t2; \
b1 = (uint8_t)t3; \
b2 = (uint8_t)t0; \
b3 = (uint8_t)t1;
#define SBOX_24(b0, b1, b2, b3) \
\
t0 = sbox[b0]; \
t1 = sbox[b1]; \
t2 = sbox[b2]; \
t3 = sbox[b3]; \
\
b0 = (uint8_t)t1; \
b1 = (uint8_t)t2; \
b2 = (uint8_t)t3; \
b3 = (uint8_t)t0;
#ifdef ___ENABLE_DWORD_CAST
#define SKINNY_MAIN() \
{ \
\
/* odd */ \
\
/* LUT(with ShiftRows & AC(c2))*/ \
\
SBOX_0( block[0], block[1], block[2], block[3]); \
SBOX_8( block[4], block[5], block[6], block[7]); \
SBOX_16(block[8], block[9], block[10], block[11]); \
SBOX_24(block[12], block[13], block[14], block[15]); \
\
/* TK1^TK2^TK3^AC(c0 c1) */ \
\
t1 = *(uint64_t*)&block[0]; \
t1 ^= *tk1++; \
t1 ^= *tk2++; \
\
/* MC */ \
\
t2 = *(uint64_t*)&block[8]; \
t0 = t2 >> 32; \
\
/* 0^2 */ \
t3 = t1 ^ t2; \
\
/* 1^2 */ \
t2 = (t1 >> 32) ^ t2; \
\
/* 0^2^3 */ \
t0 = t0 ^ t3; \
\
*(uint32_t*)&block[0] = (uint32_t)t0; \
*(uint32_t*)&block[4] = (uint32_t)t1; \
*(uint32_t*)&block[8] = (uint32_t)t2; \
*(uint32_t*)&block[12] = (uint32_t)t3; \
\
/* even */ \
\
/* LUT(with ShiftRows & AC(c2))*/ \
\
SBOX_0( block[0], block[1], block[2], block[3]); \
SBOX_8( block[4], block[5], block[6], block[7]); \
SBOX_16(block[8], block[9], block[10], block[11]); \
SBOX_24(block[12], block[13], block[14], block[15]); \
\
/* TK2^TK3^AC(c0 c1) */ \
\
t1 = *(uint64_t*)&block[0]; \
t1 ^= *tk2++; \
\
/* MC */ \
\
t2 = *(uint64_t*)&block[8]; \
t0 = t2 >> 32; \
\
/* 0^2 */ \
t3 = t1 ^ t2; \
\
/* 1^2 */ \
t2 = (t1 >> 32) ^ t2; \
\
/* 0^2^3 */ \
t0 = t0 ^ t3; \
\
*(uint32_t*)&block[0] = (uint32_t)t0; \
*(uint32_t*)&block[4] = (uint32_t)t1; \
*(uint32_t*)&block[8] = (uint32_t)t2; \
*(uint32_t*)&block[12] = (uint32_t)t3; \
}
#ifndef ___SKINNY_LOOP
void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2)
{
uint64_t *tk1;
uint64_t *tk2;
uint64_t t0; // used in MACRO
uint64_t t1; // used in MACRO
uint64_t t2; // used in MACRO
uint64_t t3; // used in MACRO
// TK1
PERMUTATION_TK1();
// SB+AC+ShR+MC
tk2 = (uint64_t*)&roundKeys[64];
tk1 = (uint64_t*)&roundKeys[0];
// 1st, ...,16th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
tk1 = (uint64_t*)&roundKeys[0];
// 17th, ...,32th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
tk1 = (uint64_t*)&roundKeys[0];
// 33th, ...,40th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
#ifdef ___NUM_OF_ROUNDS_56
// 41th, ...,48th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
tk1 = (uint64_t*)&roundKeys[0];
// 49th, ... ,56th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
#endif
}
#else /* ___SKINNY_LOOP */
void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2)
{
uint64_t *tk1;
uint64_t *tk2;
uint64_t t0; // used in MACRO
uint64_t t1; // used in MACRO
uint64_t t2; // used in MACRO
uint64_t t3; // used in MACRO
// TK1
PERMUTATION_TK1();
// SB+AC+ShR+MC
tk2 = (uint64_t*)&roundKeys[64];
// 1st, ... ,32th or 48th round
#ifndef ___NUM_OF_ROUNDS_56
for(int j=0;j<2;j++)
#else
for(int j=0;j<3;j++)
#endif
{
tk1 = (uint64_t*)&roundKeys[0];
for(int i=0;i<8;i++)
{
SKINNY_MAIN();
}
}
// 33th , ... ,40th or 49th, .... ,56th round
{
tk1 = (uint64_t*)&roundKeys[0];
for(int i=0;i<4;i++)
{
SKINNY_MAIN();
}
}
}
#endif /* ___SKINNY_LOOP */
#else /* ___ENABLE_DWORD_CAST */
#define SKINNY_MAIN() \
{ \
\
/* odd */ \
\
/* LUT(with ShiftRows & AC(c2))*/ \
\
SBOX_0( block[0], block[1], block[2], block[3]); \
SBOX_8( block[4], block[5], block[6], block[7]); \
SBOX_16(block[8], block[9], block[10], block[11]); \
SBOX_24(block[12], block[13], block[14], block[15]); \
\
/* TK1^TK2^TK3^AC(c0 c1) */ \
\
t1 = *(uint32_t*)&block[0]; \
t0 = *(uint32_t*)&block[4]; \
t1 ^= *tk1++; \
t1 ^= *tk2++; \
t0 ^= *tk1++; \
t0 ^= *tk2++; \
\
/* MC */ \
\
t2 = *(uint32_t*)&block[8]; \
t4 = *(uint32_t*)&block[12]; \
\
/* 0^2 */ \
t3 = t1 ^ t2; \
\
/* 1^2 */ \
t2 = t0 ^ t2; \
\
/* 0^2^3 */ \
t0 = t3 ^ t4; \
\
*(uint32_t*)&block[0] = t0; \
*(uint32_t*)&block[4] = t1; \
*(uint32_t*)&block[8] = t2; \
*(uint32_t*)&block[12] = t3; \
\
/* even */ \
\
/* LUT(with ShiftRows & AC(c2))*/ \
\
SBOX_0( block[0], block[1], block[2], block[3]); \
SBOX_8( block[4], block[5], block[6], block[7]); \
SBOX_16(block[8], block[9], block[10], block[11]); \
SBOX_24(block[12], block[13], block[14], block[15]); \
\
/* TK2^TK3^AC(c0 c1) */ \
\
t1 = *(uint32_t*)&block[0]; \
t0 = *(uint32_t*)&block[4]; \
t1 ^= *tk2++; \
t0 ^= *tk2++; \
\
/* MC */ \
\
t2 = *(uint32_t*)&block[8]; \
t4 = *(uint32_t*)&block[12]; \
\
/* 0^2 */ \
t3 = t1 ^ t2; \
\
/* 1^2 */ \
t2 = t0 ^ t2; \
\
/* 0^2^3 */ \
t0 = t3 ^ t4; \
\
*(uint32_t*)&block[0] = t0; \
*(uint32_t*)&block[4] = t1; \
*(uint32_t*)&block[8] = t2; \
*(uint32_t*)&block[12] = t3; \
}
#ifndef ___SKINNY_LOOP
void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2)
{
uint32_t *tk1;
uint32_t *tk2;
uint32_t t0; // used in MACRO
uint32_t t1; // used in MACRO
uint32_t t2; // used in MACRO
uint32_t t3; // used in MACRO
uint32_t t4; // used in MACRO
// TK1
PERMUTATION_TK1();
// SB+AC+ShR+MC
tk2 = (uint32_t*)&roundKeys[64];
tk1 = (uint32_t*)&roundKeys[0];
// 1st, ...,16th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
tk1 = (uint32_t*)&roundKeys[0];
// 17th, ...,32th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
tk1 = (uint32_t*)&roundKeys[0];
// 33th, ...,40th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
#ifdef ___NUM_OF_ROUNDS_56
// 41th, ...,48th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
tk1 = (uint32_t*)&roundKeys[0];
// 49th, ... ,56th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
#endif
}
#else /* ___SKINNY_LOOP */
void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2)
{
uint32_t *tk1;
uint32_t *tk2;
uint32_t t0; // used in MACRO
uint32_t t1; // used in MACRO
uint32_t t2; // used in MACRO
uint32_t t3; // used in MACRO
uint32_t t4; // used in MACRO
// TK1
PERMUTATION_TK1();
// SB+AC+ShR+MC
tk2 = (uint32_t*)&roundKeys[64];
// 1st, ... ,32th or 48th round
#ifndef ___NUM_OF_ROUNDS_56
for(int j=0;j<2;j++)
#else
for(int j=0;j<3;j++)
#endif
{
tk1 = (uint32_t*)&roundKeys[0];
for(int i=0;i<8;i++)
{
SKINNY_MAIN();
}
}
// 33th , ... ,40th or 49th, .... ,56th round
{
tk1 = (uint32_t*)&roundKeys[0];
for(int i=0;i<4;i++)
{
SKINNY_MAIN();
}
}
}
#endif /* ___SKINNY_LOOP */
#endif /* ___ENABLE_DWORD_CAST */
#define CRYPTO_KEYBYTES 16
#define CRYPTO_NSECBYTES 0
#define CRYPTO_NPUBBYTES 16
#define CRYPTO_ABYTES 16
#define CRYPTO_NOOVERLAP 1
/*
* Date: 29 November 2018
* Contact: Thomas Peyrin - thomas.peyrin@gmail.com
* Mustafa Khairallah - mustafam001@e.ntu.edu.sg
*/
#include "crypto_aead.h"
#include "api.h"
#include "skinny.h"
#include <stdio.h>
#include <stdlib.h>
void pad (const unsigned char* m, unsigned char* mp, int len8) {
#ifdef ___ENABLE_DWORD_CAST
if (0 == len8) {
*(uint64_t*)(&mp[0]) = 0;
*(uint64_t*)(&mp[8]) = 0;
} else if (8 > len8) {
*(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]) & (0xffffffffffffffff >> (64 - len8*8));
*(uint64_t*)(&mp[8]) = 0;
mp[15] = len8;
} else if (8 == len8) {
*(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]);
*(uint64_t*)(&mp[8]) = 0;
mp[15] = 8;
} else if (16 > len8) {
*(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]);
*(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]) & (0xffffffffffffffff >> (128 - len8*8));
mp[15] = len8;
} else {
*(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]);
*(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]);
}
#else
if (0 == len8) {
*(uint32_t*)(&mp[0]) = 0;
*(uint32_t*)(&mp[4]) = 0;
*(uint32_t*)(&mp[8]) = 0;
*(uint32_t*)(&mp[12]) = 0;
} else if (4 > len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]) & (0xffffffff >> (32 - len8*8));
*(uint32_t*)(&mp[4]) = 0;
*(uint32_t*)(&mp[8]) = 0;
*(uint32_t*)(&mp[12]) = 0;
mp[15] = len8;
} else if (4 == len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = 0;
*(uint32_t*)(&mp[8]) = 0;
*(uint32_t*)(&mp[12]) = 0;
mp[15] = 4;
} else if (8 > len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]) & (0xffffffff >> (64 - len8*8));
*(uint32_t*)(&mp[8]) = 0;
*(uint32_t*)(&mp[12]) = 0;
mp[15] = len8;
} else if (8 == len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]);
*(uint32_t*)(&mp[8]) = 0;
*(uint32_t*)(&mp[12]) = 0;
mp[15] = 8;
} else if (12 > len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]);
*(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]) & (0xffffffff >> (96 - len8*8));
*(uint32_t*)(&mp[12]) = 0;
mp[15] = len8;
} else if (12 == len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]);
*(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]);
*(uint32_t*)(&mp[12]) = 0;
mp[15] = 12;
} else if (16 > len8) {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]);
*(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]);
*(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]) & (0xffffffff >> (128 - len8*8));
mp[15] = len8;
} else {
*(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]);
*(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]);
*(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]);
*(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]);
}
#endif
}
void g8A (unsigned char* s, unsigned char* c) {
#ifdef ___ENABLE_DWORD_CAST
uint64_t s0 = *(uint64_t*)(&s[0]);
uint64_t s1 = *(uint64_t*)(&s[8]);
uint64_t c0, c1;
c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080);
c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080);
*(uint64_t*)(&c[0]) = c0;
*(uint64_t*)(&c[8]) = c1;
#else
uint32_t s0 = *(uint32_t*)(&s[0]);
uint32_t s1 = *(uint32_t*)(&s[4]);
uint32_t s2 = *(uint32_t*)(&s[8]);
uint32_t s3 = *(uint32_t*)(&s[12]);
uint32_t c0, c1, c2, c3;
c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080);
c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080);
c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080);
c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080);
*(uint32_t*)(&c[0]) = c0;
*(uint32_t*)(&c[4]) = c1;
*(uint32_t*)(&c[8]) = c2;
*(uint32_t*)(&c[12]) = c3;
#endif
}
void g8A_for_Tag_Generation (unsigned char* s, unsigned char* c) {
#ifdef ___ENABLE_DWORD_CAST
uint64_t s0 = *(uint64_t*)(&s[0]);
uint64_t s1 = *(uint64_t*)(&s[8]);
uint64_t c0, c1;
c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080);
c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080);
// use byte access because of memory alignment.
// c is not always in word(4 byte) alignment.
c[0] = c0 &0xFF;
c[1] = (c0>>8) &0xFF;
c[2] = (c0>>16)&0xFF;
c[3] = (c0>>24)&0xFF;
c[4] = (c0>>32)&0xFF;
c[5] = (c0>>40)&0xFF;
c[6] = (c0>>48)&0xFF;
c[7] = c0>>56;
c[8] = c1 &0xFF;
c[9] = (c1>>8) &0xFF;
c[10] = (c1>>16)&0xFF;
c[11] = (c1>>24)&0xFF;
c[12] = (c1>>32)&0xFF;
c[13] = (c1>>40)&0xFF;
c[14] = (c1>>48)&0xFF;
c[15] = c1>>56;
#else
uint32_t s0 = *(uint32_t*)(&s[0]);
uint32_t s1 = *(uint32_t*)(&s[4]);
uint32_t s2 = *(uint32_t*)(&s[8]);
uint32_t s3 = *(uint32_t*)(&s[12]);
uint32_t c0, c1, c2, c3;
c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080);
c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080);
c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080);
c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080);
// use byte access because of memory alignment.
// c is not always in word(4 byte) alignment.
c[0] = c0 &0xFF;
c[1] = (c0>>8) &0xFF;
c[2] = (c0>>16)&0xFF;
c[3] = c0>>24;
c[4] = c1 &0xFF;
c[5] = (c1>>8) &0xFF;
c[6] = (c1>>16)&0xFF;
c[7] = c1>>24;
c[8] = c2 &0xFF;
c[9] = (c2>>8) &0xFF;
c[10] = (c2>>16)&0xFF;
c[11] = c2>>24;
c[12] = c3 &0xFF;
c[13] = (c3>>8) &0xFF;
c[14] = (c3>>16)&0xFF;
c[15] = c3>>24;
#endif
}
void rho_ad_eqov16 (
const unsigned char* m,
unsigned char* s) {
#ifdef ___ENABLE_DWORD_CAST
*(uint64_t*)(&s[0]) ^= *(uint64_t*)(&m[0]);
*(uint64_t*)(&s[8]) ^= *(uint64_t*)(&m[8]);
#else
*(uint32_t*)(&s[0]) ^= *(uint32_t*)(&m[0]);
*(uint32_t*)(&s[4]) ^= *(uint32_t*)(&m[4]);
*(uint32_t*)(&s[8]) ^= *(uint32_t*)(&m[8]);
*(uint32_t*)(&s[12]) ^= *(uint32_t*)(&m[12]);
#endif
}
void rho_ad_ud16 (
const unsigned char* m,
unsigned char* s,
int len8) {
unsigned char mp [16];
pad(m,mp,len8);
#ifdef ___ENABLE_DWORD_CAST
*(uint64_t*)(&s[0]) ^= *(uint64_t*)(&mp[0]);
*(uint64_t*)(&s[8]) ^= *(uint64_t*)(&mp[8]);
#else
*(uint32_t*)(&s[0]) ^= *(uint32_t*)(&mp[0]);
*(uint32_t*)(&s[4]) ^= *(uint32_t*)(&mp[4]);
*(uint32_t*)(&s[8]) ^= *(uint32_t*)(&mp[8]);
*(uint32_t*)(&s[12]) ^= *(uint32_t*)(&mp[12]);
#endif
}
void rho_eqov16 (
const unsigned char* m,
unsigned char* c,
unsigned char* s) {
g8A(s,c);
#ifdef ___ENABLE_DWORD_CAST
uint64_t c0 = *(uint64_t*)(&c[0]);
uint64_t c1 = *(uint64_t*)(&c[8]);
uint64_t s0 = *(uint64_t*)(&s[0]);
uint64_t s1 = *(uint64_t*)(&s[8]);
uint64_t m0 = *(uint64_t*)(&m[0]);
uint64_t m1 = *(uint64_t*)(&m[8]);
s0 ^= m0;
s1 ^= m1;
c0 ^= m0;
c1 ^= m1;
*(uint64_t*)(&s[0]) = s0;
*(uint64_t*)(&s[8]) = s1;
*(uint64_t*)(&c[0]) = c0;
*(uint64_t*)(&c[8]) = c1;
#else
uint32_t c0 = *(uint32_t*)(&c[0]);
uint32_t c1 = *(uint32_t*)(&c[4]);
uint32_t c2 = *(uint32_t*)(&c[8]);
uint32_t c3 = *(uint32_t*)(&c[12]);
uint32_t s0 = *(uint32_t*)(&s[0]);
uint32_t s1 = *(uint32_t*)(&s[4]);
uint32_t s2 = *(uint32_t*)(&s[8]);
uint32_t s3 = *(uint32_t*)(&s[12]);
uint32_t m0 = *(uint32_t*)(&m[0]);
uint32_t m1 = *(uint32_t*)(&m[4]);
uint32_t m2 = *(uint32_t*)(&m[8]);
uint32_t m3 = *(uint32_t*)(&m[12]);
s0 ^= m0;
s1 ^= m1;
s2 ^= m2;
s3 ^= m3;
c0 ^= m0;
c1 ^= m1;
c2 ^= m2;
c3 ^= m3;
*(uint32_t*)(&s[0]) = s0;
*(uint32_t*)(&s[4]) = s1;
*(uint32_t*)(&s[8]) = s2;
*(uint32_t*)(&s[12]) = s3;
*(uint32_t*)(&c[0]) = c0;
*(uint32_t*)(&c[4]) = c1;
*(uint32_t*)(&c[8]) = c2;
*(uint32_t*)(&c[12]) = c3;
#endif
}
void rho_ud16 (
const unsigned char* m,
unsigned char* c,
unsigned char* s,
int len8) {
unsigned char mp [16];
pad(m,mp,len8);
g8A(s,c);
#ifdef ___ENABLE_DWORD_CAST
uint64_t mp0 = *(uint64_t*)&mp[0];
uint64_t mp1 = *(uint64_t*)&mp[8];
uint64_t c0 = *(uint64_t*)&c[0];
uint64_t c1 = *(uint64_t*)&c[8];
*(uint64_t*)(&s[0]) ^= mp0;
*(uint64_t*)(&s[8]) ^= mp1;
if (0 == len8) {
c0 = 0;
c1 = 0;
} else if (8 > len8) {
c0 = c0 ^ (mp0 & 0xffffffffffffffff >> (64 - (len8*8)));
c0 = c0 ^ (c0 & 0xffffffffffffffff << ( (len8*8)));
c1 = 0;
} else if (8 == len8) {
c0 = c0 ^ mp0;
c1 = 0;
} else if (16 > len8) {
len8 -= 8;
c0 = c0 ^ mp0;
c1 = c1 ^ (mp1 & 0xffffffffffffffff >> (64 - (len8*8)));
c1 = c1 ^ (c1 & 0xffffffffffffffff << ( (len8*8)));
} else {
c0 = c0 ^ mp0;
c1 = c1 ^ mp1;
}
*(uint64_t*)&c[0] = c0;
*(uint64_t*)&c[8] = c1;
#else
uint32_t mp0 = *(uint32_t*)&mp[0];
uint32_t mp1 = *(uint32_t*)&mp[4];
uint32_t mp2 = *(uint32_t*)&mp[8];
uint32_t mp3 = *(uint32_t*)&mp[12];
uint32_t c0 = *(uint32_t*)&c[0];
uint32_t c1 = *(uint32_t*)&c[4];
uint32_t c2 = *(uint32_t*)&c[8];
uint32_t c3 = *(uint32_t*)&c[12];
*(uint32_t*)(&s[0]) ^= mp0;
*(uint32_t*)(&s[4]) ^= mp1;
*(uint32_t*)(&s[8]) ^= mp2;
*(uint32_t*)(&s[12]) ^= mp3;
if (0 == len8) {
c0 = 0;
c1 = 0;
c2 = 0;
c3 = 0;
} else if (4 > len8) {
c0 = c0 ^ (mp0 & 0xffffffff >> (32 - (len8*8)));
c0 = c0 ^ (c0 & 0xffffffff << ( (len8*8)));
c1 = 0;
c2 = 0;
c3 = 0;
} else if (4 == len8) {
c0 = c0 ^ mp0;
c1 = 0;
c2 = 0;
c3 = 0;
} else if (8 > len8) {
len8 -= 4;
c0 = c0 ^ mp0;
c1 = c1 ^ (mp1 & 0xffffffff >> (32 - (len8*8)));
c1 = c1 ^ (c1 & 0xffffffff << ( (len8*8)));
c2 = 0;
c3 = 0;
} else if (8 == len8) {
c0 = c0 ^ mp0;
c1 = c1 ^ mp1;
c2 = 0;
c3 = 0;
} else if (12 > len8) {
len8 -= 8;
c0 = c0 ^ mp0;
c1 = c1 ^ mp1;
c2 = c2 ^ (mp2 & 0xffffffff >> (32 - (len8*8)));
c2 = c2 ^ (c2 & 0xffffffff << ( (len8*8)));
c3 = 0;
} else if (12 == len8) {
c0 = c0 ^ mp0;
c1 = c1 ^ mp1;
c2 = c2 ^ mp2;
c3 = 0;
} else if (16 > len8) {
len8 -= 12;
c0 = c0 ^ mp0;
c1 = c1 ^ mp1;
c2 = c2 ^ mp2;
c3 = c3 ^ (mp3 & 0xffffffff >> (32 - (len8*8)));
c3 = c3 ^ (c3 & 0xffffffff << ( (len8*8)));
} else {
c0 = c0 ^ mp0;
c1 = c1 ^ mp1;
c2 = c2 ^ mp2;
c3 = c3 ^ mp3;
}
*(uint32_t*)&c[0] = c0;
*(uint32_t*)&c[4] = c1;
*(uint32_t*)&c[8] = c2;
*(uint32_t*)&c[12] = c3;
#endif
}
void irho_eqov16 (
unsigned char* m,
const unsigned char* c,
unsigned char* s) {
g8A(s,m);
#ifdef ___ENABLE_DWORD_CAST
uint64_t c0 = *(uint64_t*)(&c[0]);
uint64_t c1 = *(uint64_t*)(&c[8]);
uint64_t s0 = *(uint64_t*)(&s[0]);
uint64_t s1 = *(uint64_t*)(&s[8]);
uint64_t m0 = *(uint64_t*)(&m[0]);
uint64_t m1 = *(uint64_t*)(&m[8]);
s0 ^= c0 ^ m0;
s1 ^= c1 ^ m1;
m0 ^= c0;
m1 ^= c1;
*(uint64_t*)(&s[0]) = s0;
*(uint64_t*)(&s[8]) = s1;
*(uint64_t*)(&m[0]) = m0;
*(uint64_t*)(&m[8]) = m1;
#else
uint32_t c0 = *(uint32_t*)(&c[0]);
uint32_t c1 = *(uint32_t*)(&c[4]);
uint32_t c2 = *(uint32_t*)(&c[8]);
uint32_t c3 = *(uint32_t*)(&c[12]);
uint32_t s0 = *(uint32_t*)(&s[0]);
uint32_t s1 = *(uint32_t*)(&s[4]);
uint32_t s2 = *(uint32_t*)(&s[8]);
uint32_t s3 = *(uint32_t*)(&s[12]);
uint32_t m0 = *(uint32_t*)(&m[0]);
uint32_t m1 = *(uint32_t*)(&m[4]);
uint32_t m2 = *(uint32_t*)(&m[8]);
uint32_t m3 = *(uint32_t*)(&m[12]);
s0 ^= c0 ^ m0;
s1 ^= c1 ^ m1;
s2 ^= c2 ^ m2;
s3 ^= c3 ^ m3;
m0 ^= c0;
m1 ^= c1;
m2 ^= c2;
m3 ^= c3;
*(uint32_t*)(&s[0]) = s0;
*(uint32_t*)(&s[4]) = s1;
*(uint32_t*)(&s[8]) = s2;
*(uint32_t*)(&s[12]) = s3;
*(uint32_t*)(&m[0]) = m0;
*(uint32_t*)(&m[4]) = m1;
*(uint32_t*)(&m[8]) = m2;
*(uint32_t*)(&m[12]) = m3;
#endif
}
void irho_ud16 (
unsigned char* m,
const unsigned char* c,
unsigned char* s,
int len8) {
unsigned char cp [16];
pad(c,cp,len8);
g8A(s,m);
#ifdef ___ENABLE_DWORD_CAST
uint64_t cp0 = *(uint64_t*)&cp[0];
uint64_t cp1 = *(uint64_t*)&cp[8];
uint64_t m0 = *(uint64_t*)&m[0];
uint64_t m1 = *(uint64_t*)&m[8];
uint64_t s0 = *(uint64_t*)&s[0];
uint64_t s1 = *(uint64_t*)&s[8];
s0 ^= cp0;
s1 ^= cp1;
if (0 == len8) {
m0 = 0;
m1 = 0;
} else if (8 > len8) {
s0 = s0 ^ (m0 & 0xffffffffffffffff >> (64 - (len8*8)));
m0 = m0 ^ (cp0 & 0xffffffffffffffff >> (64 - (len8*8)));
m0 = m0 ^ (m0 & 0xffffffffffffffff << ( (len8*8)));
m1 = 0;
} else if (8 == len8) {
s0 = s0 ^ m0;
m0 = m0 ^ cp0;
m1 = 0;
} else if (16 > len8) {
len8 -= 8;
s0 = s0 ^ m0;
s1 = s1 ^ (m1 & 0xffffffffffffffff >> (64 - (len8*8)));
m0 = m0 ^ cp0;
m1 = m1 ^ (cp1 & 0xffffffffffffffff >> (64 - (len8*8)));
m1 = m1 ^ (m1 & 0xffffffffffffffff << ( (len8*8)));
} else {
s0 = s0 ^ m0;
s1 = s1 ^ m1;
m0 = m0 ^ cp0;
m1 = m1 ^ cp1;
}
*(uint64_t*)&s[0] = s0;
*(uint64_t*)&s[8] = s1;
*(uint64_t*)&m[0] = m0;
*(uint64_t*)&m[8] = m1;
#else
uint32_t cp0 = *(uint32_t*)&cp[0];
uint32_t cp1 = *(uint32_t*)&cp[4];
uint32_t cp2 = *(uint32_t*)&cp[8];
uint32_t cp3 = *(uint32_t*)&cp[12];
uint32_t m0 = *(uint32_t*)&m[0];
uint32_t m1 = *(uint32_t*)&m[4];
uint32_t m2 = *(uint32_t*)&m[8];
uint32_t m3 = *(uint32_t*)&m[12];
uint32_t s0 = *(uint32_t*)&s[0];
uint32_t s1 = *(uint32_t*)&s[4];
uint32_t s2 = *(uint32_t*)&s[8];
uint32_t s3 = *(uint32_t*)&s[12];
s0 ^= cp0;
s1 ^= cp1;
s2 ^= cp2;
s3 ^= cp3;
if (0 == len8) {
m0 = 0;
m1 = 0;
m2 = 0;
m3 = 0;
} else if (4 > len8) {
s0 = s0 ^ (m0 & 0xffffffff >> (32 - (len8*8)));
m0 = m0 ^ (cp0 & 0xffffffff >> (32 - (len8*8)));
m0 = m0 ^ (m0 & 0xffffffff << ( (len8*8)));
m1 = 0;
m2 = 0;
m3 = 0;
} else if (4 == len8) {
s0 = s0 ^ m0;
m0 = m0 ^ cp0;
m1 = 0;
m2 = 0;
m3 = 0;
} else if (8 > len8) {
len8 -= 4;
s0 = s0 ^ m0;
s1 = s1 ^ (m1 & 0xffffffff >> (32 - (len8*8)));
m0 = m0 ^ cp0;
m1 = m1 ^ (cp1 & 0xffffffff >> (32 - (len8*8)));
m1 = m1 ^ (m1 & 0xffffffff << ( (len8*8)));
m2 = 0;
m3 = 0;
} else if (8 == len8) {
s0 = s0 ^ m0;
s1 = s1 ^ m1;
m0 = m0 ^ cp0;
m1 = m1 ^ cp1;
m2 = 0;
m3 = 0;
} else if (12 > len8) {
len8 -= 8;
s0 = s0 ^ m0;
s1 = s1 ^ m1;
s2 = s2 ^ (m2 & 0xffffffff >> (32 - (len8*8)));
m0 = m0 ^ cp0;
m1 = m1 ^ cp1;
m2 = m2 ^ (cp2 & 0xffffffff >> (32 - (len8*8)));
m2 = m2 ^ (m2 & 0xffffffff << ( (len8*8)));
m3 = 0;
} else if (12 == len8) {
s0 = s0 ^ m0;
s1 = s1 ^ m1;
s2 = s2 ^ m2;
m0 = m0 ^ cp0;
m1 = m1 ^ cp1;
m2 = m2 ^ cp2;
m3 = 0;
} else if (16 > len8) {
len8 -= 12;
s0 = s0 ^ m0;
s1 = s1 ^ m1;
s2 = s2 ^ m2;
s3 = s3 ^ (m3 & 0xffffffff >> (32 - (len8*8)));
m0 = m0 ^ cp0;
m1 = m1 ^ cp1;
m2 = m2 ^ cp2;
m3 = m3 ^ (cp3 & 0xffffffff >> (32 - (len8*8)));
m3 = m3 ^ (m3 & 0xffffffff << ( (len8*8)));
} else {
s0 = s0 ^ m0;
s1 = s1 ^ m1;
s2 = s2 ^ m2;
s3 = s3 ^ m3;
m0 = m0 ^ cp0;
m1 = m1 ^ cp1;
m2 = m2 ^ cp2;
m3 = m3 ^ cp3;
}
*(uint32_t*)&s[0] = s0;
*(uint32_t*)&s[4] = s1;
*(uint32_t*)&s[8] = s2;
*(uint32_t*)&s[12] = s3;
*(uint32_t*)&m[0] = m0;
*(uint32_t*)&m[4] = m1;
*(uint32_t*)&m[8] = m2;
*(uint32_t*)&m[12] = m3;
#endif
}
void reset_lfsr_gf56 (unsigned char* CNT) {
#ifdef ___ENABLE_DWORD_CAST
*(uint64_t*)(&CNT[0]) = 0x0000000000000001; // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0
#else
*(uint32_t*)(&CNT[0]) = 0x00000001; // CNT3 CNT2 CNT1 CNT0
*(uint32_t*)(&CNT[4]) = 0x00000000; // CNT7 CNT6 CNT5 CNT4
#endif
}
void lfsr_gf56 (unsigned char* CNT) {
#ifdef ___ENABLE_DWORD_CAST
uint64_t C0;
uint64_t fb0;
C0 = *(uint64_t*)(&CNT[0]); // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0
fb0 = 0;
if (CNT[6] & 0x80) {
fb0 = 0x95;
}
C0 = C0 << 1 ^ fb0;
*(uint64_t*)(&CNT[0]) = C0;
#else
uint32_t C0;
uint32_t C1;
uint32_t fb0;
C0 = *(uint32_t*)(&CNT[0]); // CNT3 CNT2 CNT1 CNT0
C1 = *(uint32_t*)(&CNT[4]); // CNT7 CNT6 CNT5 CNT4
fb0 = 0;
if (CNT[6] & 0x80) {
fb0 = 0x95;
}
C1 = C1 << 1 | C0 >> 31;
C0 = C0 << 1 ^ fb0;
*(uint32_t*)(&CNT[0]) = C0;
*(uint32_t*)(&CNT[4]) = C1;
#endif
}
void block_cipher(
unsigned char* s,
const unsigned char* k, unsigned char* T,
unsigned char* CNT, unsigned char D,
skinny_ctrl* p_skinny_ctrl) {
CNT[7] = D;
p_skinny_ctrl->func_skinny_128_384_enc(s, p_skinny_ctrl, CNT, T, k);
}
void nonce_encryption (
const unsigned char* N,
unsigned char* CNT,
unsigned char*s, const unsigned char* k,
unsigned char D,
skinny_ctrl* p_skinny_ctrl) {
block_cipher(s,k,(unsigned char*)N,CNT,D,p_skinny_ctrl);
}
void generate_tag (
unsigned char** c, unsigned char* s,
unsigned long long* clen) {
g8A_for_Tag_Generation(s, *c);
*c = *c + 16;
*c = *c - *clen;
}
unsigned long long msg_encryption_eqov16 (
const unsigned char** M, unsigned char** c,
const unsigned char* N,
unsigned char* CNT,
unsigned char*s, const unsigned char* k,
unsigned char D,
unsigned long long mlen,
skinny_ctrl* p_skinny_ctrl) {
rho_eqov16(*M, *c, s);
*c = *c + 16;
*M = *M + 16;
lfsr_gf56(CNT);
nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl);
return mlen - 16;
}
unsigned long long msg_encryption_ud16 (
const unsigned char** M, unsigned char** c,
const unsigned char* N,
unsigned char* CNT,
unsigned char*s, const unsigned char* k,
unsigned char D,
unsigned long long mlen,
skinny_ctrl* p_skinny_ctrl) {
// char msg[64];
//
// unsigned int st = (unsigned int )read_cycle();
rho_ud16(*M, *c, s, mlen);
// unsigned int ed = (unsigned int )read_cycle();
// sprintf(msg, "rho_ud16 %d\n", ed-st);
// SerialPuts(msg);
//
// fprint_bstr(NULL, "c = ", *c, 16);
*c = *c + mlen;
*M = *M + mlen;
lfsr_gf56(CNT);
nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl);
return 0;
}
unsigned long long msg_decryption_eqov16 (
unsigned char** M, const unsigned char** c,
const unsigned char* N,
unsigned char* CNT,
unsigned char*s, const unsigned char* k,
unsigned char D,
unsigned long long clen,
skinny_ctrl* p_skinny_ctrl) {
irho_eqov16(*M, *c, s);
*c = *c + 16;
*M = *M + 16;
lfsr_gf56(CNT);
nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl);
return clen - 16;
}
unsigned long long msg_decryption_ud16 (
unsigned char** M, const unsigned char** c,
const unsigned char* N,
unsigned char* CNT,
unsigned char*s, const unsigned char* k,
unsigned char D,
unsigned long long clen,
skinny_ctrl* p_skinny_ctrl) {
irho_ud16(*M, *c, s, clen);
*c = *c + clen;
*M = *M + clen;
lfsr_gf56(CNT);
nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl);
return 0;
}
unsigned long long ad_encryption_eqov32 (
const unsigned char** A, unsigned char* s,
const unsigned char* k, unsigned long long adlen,
unsigned char* CNT,
unsigned char D,
skinny_ctrl* p_skinny_ctrl) {
unsigned char T [16];
rho_ad_eqov16(*A, s);
*A = *A + 16;
lfsr_gf56(CNT);
#ifdef ___ENABLE_DWORD_CAST
*(uint64_t*)(&T[0]) = *(uint64_t*)(&(*A)[0]);
*(uint64_t*)(&T[8]) = *(uint64_t*)(&(*A)[8]);
#else
*(uint32_t*)(&T[0]) = *(uint32_t*)(&(*A)[0]);
*(uint32_t*)(&T[4]) = *(uint32_t*)(&(*A)[4]);
*(uint32_t*)(&T[8]) = *(uint32_t*)(&(*A)[8]);
*(uint32_t*)(&T[12]) = *(uint32_t*)(&(*A)[12]);
#endif
*A = *A + 16;
block_cipher(s,k,T,CNT,D,p_skinny_ctrl);
lfsr_gf56(CNT);
return adlen - 32;
}
unsigned long long ad_encryption_ov16 (
const unsigned char** A, unsigned char* s,
const unsigned char* k, unsigned long long adlen,
unsigned char* CNT,
unsigned char D,
skinny_ctrl* p_skinny_ctrl) {
unsigned char T [16];
adlen = adlen - 16;
rho_ad_eqov16(*A, s);
*A = *A + 16;
lfsr_gf56(CNT);
pad(*A, T, adlen);
*A = *A + adlen;
block_cipher(s,k,T,CNT,D,p_skinny_ctrl);
lfsr_gf56(CNT);
return 0;
}
unsigned long long ad_encryption_eq16 (
const unsigned char** A, unsigned char* s,
unsigned char* CNT) {
rho_ad_eqov16(*A, s);
*A = *A + 16;
lfsr_gf56(CNT);
return 0;
}
unsigned long long ad_encryption_ud16(
const unsigned char** A, unsigned char* s,
unsigned long long adlen,
unsigned char* CNT) {
rho_ad_ud16(*A, s, adlen);
*A = *A + adlen;
lfsr_gf56(CNT);
return 0;
}
int crypto_aead_encrypt (
unsigned char* c, unsigned long long* clen,
const unsigned char* m, unsigned long long mlen,
const unsigned char* ad, unsigned long long adlen,
const unsigned char* nsec,
const unsigned char* npub,
const unsigned char* k) {
unsigned char s[16];
unsigned char CNT[8];
const unsigned char* A;
const unsigned char* M;
const unsigned char* N;
skinny_ctrl ctrl;
ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12;
(void) nsec;
A = ad;
M = m;
N = npub;
#ifdef ___ENABLE_DWORD_CAST
*(uint64_t*)(&s[0]) = 0;
*(uint64_t*)(&s[8]) = 0;
#else
*(uint32_t*)(&s[0]) = 0;
*(uint32_t*)(&s[4]) = 0;
*(uint32_t*)(&s[8]) = 0;
*(uint32_t*)(&s[12]) = 0;
#endif
reset_lfsr_gf56(CNT);
if (adlen == 0) { // AD is an empty string
lfsr_gf56(CNT);
nonce_encryption(N,CNT,s,k,0x1a,&ctrl);
}
else while (adlen > 0) {
if (adlen < 16) { // The last block of AD is odd and incomplete
adlen = ad_encryption_ud16(&A,s,adlen,CNT);
nonce_encryption(N,CNT,s,k,0x1a,&ctrl);
}
else if (adlen == 16) { // The last block of AD is odd and complete
adlen = ad_encryption_eq16(&A,s,CNT);
nonce_encryption(N,CNT,s,k,0x18,&ctrl);
}
else if (adlen < 32) { // The last block of AD is even and incomplete
adlen = ad_encryption_ov16(&A,s,k,adlen,CNT,0x08,&ctrl);
nonce_encryption(N,CNT,s,k,0x1a,&ctrl);
}
else if (adlen == 32) { // The last block of AD is even and complete
adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl);
nonce_encryption(N,CNT,s,k,0x18,&ctrl);
}
else { // A normal full pair of blocks of AD
adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl);
}
}
ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1;
reset_lfsr_gf56(CNT);
*clen = mlen + 16;
if (mlen == 0) { // M is an empty string
lfsr_gf56(CNT);
nonce_encryption(N,CNT,s,k,0x15,&ctrl);
}
else while (mlen > 0) {
if (mlen < 16) { // The last block of M is incomplete
mlen = msg_encryption_ud16(&M,&c,N,CNT,s,k,0x15,mlen,&ctrl);
}
else if (mlen == 16) { // The last block of M is complete
mlen = msg_encryption_eqov16(&M,&c,N,CNT,s,k,0x14,mlen,&ctrl);
}
else { // A normal full message block
mlen = msg_encryption_eqov16(&M,&c,N,CNT,s,k,0x04,mlen,&ctrl);
}
}
// Tag generation
generate_tag(&c,s,clen);
return 0;
}
int crypto_aead_decrypt(
unsigned char *m,unsigned long long *mlen,
unsigned char *nsec,
const unsigned char *c,unsigned long long clen,
const unsigned char *ad,unsigned long long adlen,
const unsigned char *npub,
const unsigned char *k) {
unsigned char s[16];
unsigned char T[16];
unsigned char CNT[8];
const unsigned char* A;
unsigned char* M;
const unsigned char* N;
skinny_ctrl ctrl;
ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12;
(void) nsec;
A = ad;
M = m;
N = npub;
#ifdef ___ENABLE_DWORD_CAST
*(uint64_t*)(&s[0]) = 0;
*(uint64_t*)(&s[8]) = 0;
#else
*(uint32_t*)(&s[0]) = 0;
*(uint32_t*)(&s[4]) = 0;
*(uint32_t*)(&s[8]) = 0;
*(uint32_t*)(&s[12]) = 0;
#endif
reset_lfsr_gf56(CNT);
if (adlen == 0) { // AD is an empty string
lfsr_gf56(CNT);
nonce_encryption(N,CNT,s,k,0x1a,&ctrl);
}
else while (adlen > 0) {
if (adlen < 16) { // The last block of AD is odd and incomplete
adlen = ad_encryption_ud16(&A,s,adlen,CNT);
nonce_encryption(N,CNT,s,k,0x1a,&ctrl);
}
else if (adlen == 16) { // The last block of AD is odd and complete
adlen = ad_encryption_eq16(&A,s,CNT);
nonce_encryption(N,CNT,s,k,0x18,&ctrl);
}
else if (adlen < 32) { // The last block of AD is even and incomplete
adlen = ad_encryption_ov16(&A,s,k,adlen,CNT,0x08,&ctrl);
nonce_encryption(N,CNT,s,k,0x1a,&ctrl);
}
else if (adlen == 32) { // The last block of AD is even and complete
adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl);
nonce_encryption(N,CNT,s,k,0x18,&ctrl);
}
else { // A normal full pair of blocks of AD
adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl);
}
}
ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1;
reset_lfsr_gf56(CNT);
clen = clen -16;
*mlen = clen;
if (clen == 0) { // C is an empty string
lfsr_gf56(CNT);
nonce_encryption(N,CNT,s,k,0x15,&ctrl);
}
else while (clen > 0) {
if (clen < 16) { // The last block of C is incomplete
clen = msg_decryption_ud16(&M,&c,N,CNT,s,k,0x15,clen,&ctrl);
}
else if (clen == 16) { // The last block of C is complete
clen = msg_decryption_eqov16(&M,&c,N,CNT,s,k,0x14,clen,&ctrl);
}
else { // A normal full message block
clen = msg_decryption_eqov16(&M,&c,N,CNT,s,k,0x04,clen,&ctrl);
}
}
// Tag generation
g8A_for_Tag_Generation(s, T);
for (int i = 0; i < 16; i++) {
if (T[i] != (*(c+i))) {
return -1;
}
}
return 0;
}
#define ___SKINNY_LOOP
#define ___NUM_OF_ROUNDS_56
#if (defined(__riscv_xlen) && (__riscv_xlen == 64))
#define ___ENABLE_DWORD_CAST
#endif
#include <stdint.h>
typedef struct ___skinny_ctrl {
#ifdef ___NUM_OF_ROUNDS_56
unsigned char roundKeys[960]; // number of rounds : 56
#else
unsigned char roundKeys[704]; // number of rounds : 40
#endif
void (*func_skinny_128_384_enc)(unsigned char*, struct ___skinny_ctrl*, unsigned char* CNT, unsigned char* T, const unsigned char* K);
} skinny_ctrl;
extern void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K);
extern void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K);
extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K);
#define pack_word(x0, x1, x2, x3, w) \
w = ((x3) << 24) ^ \
((x2) << 16) ^ \
((x1) << 8) ^ \
(x0);
#define unpack_word(x0, x1, x2, x3, w) \
x0 = ((w) & 0xff); \
x1 = (((w) >> 8) & 0xff); \
x2 = (((w) >> 16) & 0xff); \
x3 = ((w) >> 24);
#ifdef ___ENABLE_DWORD_CAST
#define PERMUTATION() \
/* permutation */ \
\
/* 7 6 5 4 3 2 1 0 */ \
/* 5 7 2 3 6 0 4 1 */ \
\
/* dw (7 6 5 4 3 2 1 0) */ \
\
/* dw (5 7 2 3 6 0 4 1) */ \
\
dt0 = dw >> 24; /* - - - 7 6 5 4 3 */ \
dt0 = dt0 & 0x00000000ff00ff00; /* - - - - 6 - 4 - */ \
\
dt1 = dw << 16; /* 5 4 3 2 1 0 - - */ \
dt1 = dt1 & 0xff00000000ff0000; /* 5 - - - - 0 - - */ \
dt0 = dt0 ^ dt1; /* 5 - - - 6 0 4 - */ \
\
dt1 = dw >> 8; /* - 7 6 5 4 3 2 1 */ \
dt1 = dt1 & 0x00ff0000000000ff; /* - 7 - - - - - 1 */ \
dt0 = dt0 ^ dt1; /* 5 7 - - 6 0 4 1 */ \
\
dt1 = dw << 8; /* 6 5 4 3 2 1 0 - */ \
dt1 = dt1 & 0x000000ff00000000; /* - - - 3 - - - - */ \
dt0 = dt0 ^ dt1; /* 5 7 - 3 6 0 4 1 */ \
\
dt1 = dw << 24; /* 4 3 2 1 0 - - - */ \
dw = dt1 & 0x0000ff0000000000; /* - - 2 - - - - - */ \
dw = dw ^ dt0; /* 5 7 2 3 6 0 4 1 */
#else
#define PERMUTATION() \
/* permutation */ \
\
/* 7 6 5 4 3 2 1 0 */ \
/* 5 7 2 3 6 0 4 1 */ \
\
/* w0 (3 2 1 0) */ \
/* w1 (7 6 5 4) */ \
\
/* w0 (6 0 4 1) */ \
/* w1 (5 7 2 3) */ \
\
t0 = w1 << 8; /* 6 5 4 - */ \
t0 = t0 & 0xff00ff00; /* 6 - 4 - */ \
\
t1 = w1 << 16; /* 5 4 - - */ \
t1 = t1 & 0xff000000; /* 5 - - - */ \
\
t2 = w1 & 0xff000000; /* 7 - - - */ \
t2 = t2 >> 8; /* - 7 - - */ \
t1 = t1 ^ t2; /* 5 7 - - */ \
\
t2 = w0 & 0xff000000; /* 3 - - - */ \
t2 = t2 >> 24; /* - - - 3 */ \
t1 = t1 ^ t2; /* 5 7 - 3 */ \
\
w1 = w0 >> 8; /* - 3 2 1 */ \
w1 = w1 & 0x0000ff00; /* - - 2 - */ \
w1 = w1 ^ t1; /* 5 7 2 3 */ \
\
t2 = w0 & 0x0000ff00; /* - - 1 - */ \
t2 = t2 >> 8; /* - - - 1 */ \
t0 = t0 ^ t2; /* 6 - 4 1 */ \
\
w0 = w0 << 16; /* 1 0 - - */ \
w0 = w0 & 0x00ff0000; /* - 0 - - */ \
w0 = w0 ^ t0; /* 6 0 4 1 */
#endif
/******************************************************************************
* Copyright (c) 2020, NEC Corporation.
*
* THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND.
*
*****************************************************************************/
/*
* SKINNY-128-384
*
* load * AC(c0 c1) ^ TK3
* calc AC(c0 c1) ^ TK2 -> store
* ART(TK2)
*
* number of rounds : 40 or 56
*/
#include "skinny.h"
#ifdef ___ENABLE_DWORD_CAST
#define PERMUTATION_TK2() \
\
/* permutation */ \
\
PERMUTATION() \
\
/* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \
dw = ((dw << 1) & 0xfefefefefefefefe) ^ \
(((dw >> 7) ^ (dw >> 5)) & 0x0101010101010101); \
\
/* Load TK3 */ \
/* TK2^TK3^AC(c0 c1) */ \
/* store */ \
*tk2 = dw ^ *tk3; \
tk2 += 2; \
tk3 += 2;
#ifndef ___SKINNY_LOOP
void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys)
{
uint64_t* tk2; // used in MACRO
uint64_t* tk3; // used in MACRO
uint64_t dt0; // used in MACRO
uint64_t dt1; // used in MACRO
uint64_t dw;
// odd
// load master key
// load master key
dw = *(uint64_t*)&roundKeys[16];
tk2 = (uint64_t*)&roundKeys[64];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[384];
#else
tk3 = (uint64_t*)&roundKeys[512];
#endif
// 1st round
*tk2 = dw ^ *tk3;
tk2 += 2;
tk3 += 2;
// 3rd,5th, ... ,37th,39th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#ifdef ___NUM_OF_ROUNDS_56
// 41th,43th, ... ,51th,53th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#endif
// even
// load master key
dw = *(uint64_t*)&roundKeys[24];
tk2 = (uint64_t*)&roundKeys[72];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[392];
#else
tk3 = (uint64_t*)&roundKeys[520];
#endif
// 2nd,4th, ... ,54th,56th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#ifdef ___NUM_OF_ROUNDS_56
// 42nd,44th, ... ,54th,56th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#endif
}
#else /* ___SKINNY_LOOP */
void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys)
{
uint64_t* tk2; // used in MACRO
uint64_t* tk3; // used in MACRO
uint64_t dt0; // used in MACRO
uint64_t dt1; // used in MACRO
uint64_t dw;
// odd
// load master key
dw = *(uint64_t*)&roundKeys[16];
tk2 = (uint64_t*)&roundKeys[64];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[384];
#else
tk3 = (uint64_t*)&roundKeys[512];
#endif
// 1st round
*tk2 = dw ^ *tk3;
tk2 += 2;
tk3 += 2;
// 3rd,5th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<19;i++)
#else
for(int i=0;i<27;i++)
#endif
{
PERMUTATION_TK2();
}
// even
// load master key
dw = *(uint64_t*)&roundKeys[24];
tk2 = (uint64_t*)&roundKeys[72];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[392];
#else
tk3 = (uint64_t*)&roundKeys[520];
#endif
// 2nd,4th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<20;i++)
#else
for(int i=0;i<28;i++)
#endif
{
PERMUTATION_TK2();
}
}
#endif /* ___SKINNY_LOOP */
#else /* ___ENABLE_DWORD_CAST */
#define PERMUTATION_TK2() \
\
/* permutation */ \
\
PERMUTATION() \
\
/* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \
w0 = ((w0 << 1) & 0xfefefefe) ^ \
(((w0 >> 7) ^ (w0 >> 5)) & 0x01010101); \
w1 = ((w1 << 1) & 0xfefefefe) ^ \
(((w1 >> 7) ^ (w1 >> 5)) & 0x01010101); \
\
/* Load TK3 */ \
/* TK2^TK3^AC(c0 c1) */ \
/* store */ \
*tk2++ = w0 ^ *tk3++; \
*tk2++ = w1 ^ *tk3++; \
tk2 += 2; \
tk3 += 2;
#ifndef ___SKINNY_LOOP
void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys)
{
uint32_t* tk2; // used in MACRO
uint32_t* tk3; // used in MACRO
uint32_t t0; // used in MACRO
uint32_t t1; // used in MACRO
uint32_t t2; // used in MACRO
uint32_t w0;
uint32_t w1;
// odd
// load master key
w0 = *(uint32_t*)&roundKeys[16];
w1 = *(uint32_t*)&roundKeys[20];
tk2 = (uint32_t*)&roundKeys[64];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[384];
#else
tk3 = (uint32_t*)&roundKeys[512];
#endif
// 1st round
*tk2++ = w0 ^ *tk3++;
*tk2++ = w1 ^ *tk3++;
tk2 += 2;
tk3 += 2;
// 3rd,5th, ... ,37th,39th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#ifdef ___NUM_OF_ROUNDS_56
// 41th,43th, ... ,51th,53th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#endif
// even
// load master key
w0 = *(uint32_t*)&roundKeys[24];
w1 = *(uint32_t*)&roundKeys[28];
tk2 = (uint32_t*)&roundKeys[72];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[392];
#else
tk3 = (uint32_t*)&roundKeys[520];
#endif
// 2nd,4th, ... ,54th,56th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#ifdef ___NUM_OF_ROUNDS_56
// 42nd,44th, ... ,54th,56th round
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
PERMUTATION_TK2();
#endif
}
#else /* ___SKINNY_LOOP */
void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys)
{
uint32_t* tk2; // used in MACRO
uint32_t* tk3; // used in MACRO
uint32_t t0; // used in MACRO
uint32_t t1; // used in MACRO
uint32_t t2; // used in MACRO
uint32_t w0;
uint32_t w1;
// odd
// load master key
w0 = *(uint32_t*)&roundKeys[16];
w1 = *(uint32_t*)&roundKeys[20];
tk2 = (uint32_t*)&roundKeys[64];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[384];
#else
tk3 = (uint32_t*)&roundKeys[512];
#endif
// 1st round
*tk2++ = w0 ^ *tk3++;
*tk2++ = w1 ^ *tk3++;
tk2 += 2;
tk3 += 2;
// 3rd,5th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<19;i++)
#else
for(int i=0;i<27;i++)
#endif
{
PERMUTATION_TK2();
}
// even
// load master key
w0 = *(uint32_t*)&roundKeys[24];
w1 = *(uint32_t*)&roundKeys[28];
tk2 = (uint32_t*)&roundKeys[72];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[392];
#else
tk3 = (uint32_t*)&roundKeys[520];
#endif
// 2nd,4th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<20;i++)
#else
for(int i=0;i<28;i++)
#endif
{
PERMUTATION_TK2();
}
}
#endif /* ___SKINNY_LOOP */
#endif /* ___ENABLE_DWORD_CAST */
/******************************************************************************
* Copyright (c) 2020, NEC Corporation.
*
* THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND.
*
*****************************************************************************/
/*
* SKINNY-128-384
*
* AC(c0 c1) ^ TK3 -> store
* ART(TK3)
*
* number of rounds : 40 or 56
*/
#include "skinny.h"
#ifdef ___ENABLE_DWORD_CAST
#define PERMUTATION_TK3(c0Val, c1Val) \
\
/* permutation */ \
\
PERMUTATION() \
\
/* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \
dw = ((dw >> 1) & 0x7f7f7f7f7f7f7f7f) ^ \
(((dw << 7) ^ (dw << 1)) & 0x8080808080808080); \
\
/* K3^AC(c0 c1) */ \
/* store */ \
dt0 = dw ^ c0Val; \
*tk3 = dt0 ^ ((uint64_t)c1Val << 40); \
tk3 += 2;
#ifndef ___SKINNY_LOOP
void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys)
{
uint64_t *tk3;
uint64_t dt0; // used in MACRO
uint64_t dt1; // used in MACRO
uint64_t dw;
// odd
// load master key
dw = *(uint64_t*)&roundKeys[32];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[384];
#else
tk3 = (uint64_t*)&roundKeys[512];
#endif
// 1st round
*tk3++ = dw ^ 0x01;
tk3 += 1;
// 3rd,5th, ... ,37th,39th round
PERMUTATION_TK3(0x7, 0x0);
PERMUTATION_TK3(0xf, 0x1);
PERMUTATION_TK3(0xd, 0x3);
PERMUTATION_TK3(0x7, 0x3);
PERMUTATION_TK3(0xe, 0x1);
PERMUTATION_TK3(0x9, 0x3);
PERMUTATION_TK3(0x7, 0x2);
PERMUTATION_TK3(0xd, 0x1);
PERMUTATION_TK3(0x5, 0x3);
PERMUTATION_TK3(0x6, 0x1);
PERMUTATION_TK3(0x8, 0x1);
PERMUTATION_TK3(0x1, 0x2);
PERMUTATION_TK3(0x5, 0x0);
PERMUTATION_TK3(0x7, 0x1);
PERMUTATION_TK3(0xc, 0x1);
PERMUTATION_TK3(0x1, 0x3);
PERMUTATION_TK3(0x6, 0x0);
PERMUTATION_TK3(0xb, 0x1);
PERMUTATION_TK3(0xd, 0x2);
#ifdef ___NUM_OF_ROUNDS_56
// 41td,43th, ... ,53th,55th round
PERMUTATION_TK3(0x4, 0x3);
PERMUTATION_TK3(0x2, 0x1);
PERMUTATION_TK3(0x8, 0x0);
PERMUTATION_TK3(0x2, 0x2);
PERMUTATION_TK3(0x9, 0x0);
PERMUTATION_TK3(0x6, 0x2);
PERMUTATION_TK3(0x9, 0x1);
PERMUTATION_TK3(0x5, 0x2);
#endif
// even
// load master key
dw = *(uint64_t*)&roundKeys[40];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[392];
#else
tk3 = (uint64_t*)&roundKeys[520];
#endif
// 2nd,4th, ... ,38th,40th round
PERMUTATION_TK3(0x3, 0x0);
PERMUTATION_TK3(0xf, 0x0);
PERMUTATION_TK3(0xe, 0x3);
PERMUTATION_TK3(0xb, 0x3);
PERMUTATION_TK3(0xf, 0x2);
PERMUTATION_TK3(0xc, 0x3);
PERMUTATION_TK3(0x3, 0x3);
PERMUTATION_TK3(0xe, 0x0);
PERMUTATION_TK3(0xa, 0x3);
PERMUTATION_TK3(0xb, 0x2);
PERMUTATION_TK3(0xc, 0x2);
PERMUTATION_TK3(0x0, 0x3);
PERMUTATION_TK3(0x2, 0x0);
PERMUTATION_TK3(0xb, 0x0);
PERMUTATION_TK3(0xe, 0x2);
PERMUTATION_TK3(0x8, 0x3);
PERMUTATION_TK3(0x3, 0x2);
PERMUTATION_TK3(0xd, 0x0);
PERMUTATION_TK3(0x6, 0x3);
PERMUTATION_TK3(0xa, 0x1);
#ifdef ___NUM_OF_ROUNDS_56
// 42nd,44th, ... ,54th,56th round
PERMUTATION_TK3(0x9, 0x2);
PERMUTATION_TK3(0x4, 0x2);
PERMUTATION_TK3(0x1, 0x1);
PERMUTATION_TK3(0x4, 0x0);
PERMUTATION_TK3(0x3, 0x1);
PERMUTATION_TK3(0xc, 0x0);
PERMUTATION_TK3(0x2, 0x3);
PERMUTATION_TK3(0xa, 0x0);
#endif
}
#else /* ___SKINNY_LOOP */
void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC)
{
uint64_t *tk3;
uint64_t dt0; // used in MACRO
uint64_t dt1; // used in MACRO
uint64_t dw;
uint64_t c0;
uint64_t c1;
// odd
// load master key
dw = *(uint64_t*)&roundKeys[32];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint64_t*)&roundKeys[384];
#else
tk3 = (uint64_t*)&roundKeys[512];
#endif
// 1st round
*tk3++ = dw ^ 0x01;
tk3 += 1;
pRC += 4;
// 3rd,5th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<19;i++)
#else
for(int i=0;i<27;i++)
#endif
{
c0 = *pRC++;
c1 = *pRC++;
pRC += 2;
PERMUTATION_TK3(c0, c1);
}
// even
// load master key
dw = *(uint64_t*)&roundKeys[40];
#ifndef ___NUM_OF_ROUNDS_56
pRC -= 78;
tk3 = (uint64_t*)&roundKeys[392];
#else
pRC -= 110;
tk3 = (uint64_t*)&roundKeys[520];
#endif
// 2nd,4th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<20;i++)
#else
for(int i=0;i<28;i++)
#endif
{
c0 = *pRC++;
c1 = *pRC++;
pRC += 2;
PERMUTATION_TK3(c0, c1);
}
}
#endif /* ___SKINNY_LOOP */
#else /* ___ENABLE_DWORD_CAST */
#define PERMUTATION_TK3(c0Val, c1Val) \
\
/* permutation */ \
\
PERMUTATION() \
\
/* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \
w0 = ((w0 >> 1) & 0x7f7f7f7f) ^ \
(((w0 << 7) ^ (w0 << 1)) & 0x80808080); \
w1 = ((w1 >> 1) & 0x7f7f7f7f) ^ \
(((w1 << 7) ^ (w1 << 1)) & 0x80808080); \
\
/* K3^AC(c0 c1) */ \
/* store */ \
*tk3++ = w0 ^ c0Val; \
*tk3++ = w1 ^ ((uint32_t)c1Val << 8); \
tk3 += 2;
#ifndef ___SKINNY_LOOP
void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys)
{
uint32_t *tk3;
uint32_t t0; // used in MACRO
uint32_t t1; // used in MACRO
uint32_t t2; // used in MACRO
uint32_t w0;
uint32_t w1;
// odd
// load master key
w0 = *(uint32_t*)&roundKeys[32];
w1 = *(uint32_t*)&roundKeys[36];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[384];
#else
tk3 = (uint32_t*)&roundKeys[512];
#endif
// 1st round
*tk3++ = w0 ^ 0x01;
*tk3++ = w1;
tk3 += 2;
// 3rd,5th, ... ,37th,39th round
PERMUTATION_TK3(0x7, 0x0);
PERMUTATION_TK3(0xf, 0x1);
PERMUTATION_TK3(0xd, 0x3);
PERMUTATION_TK3(0x7, 0x3);
PERMUTATION_TK3(0xe, 0x1);
PERMUTATION_TK3(0x9, 0x3);
PERMUTATION_TK3(0x7, 0x2);
PERMUTATION_TK3(0xd, 0x1);
PERMUTATION_TK3(0x5, 0x3);
PERMUTATION_TK3(0x6, 0x1);
PERMUTATION_TK3(0x8, 0x1);
PERMUTATION_TK3(0x1, 0x2);
PERMUTATION_TK3(0x5, 0x0);
PERMUTATION_TK3(0x7, 0x1);
PERMUTATION_TK3(0xc, 0x1);
PERMUTATION_TK3(0x1, 0x3);
PERMUTATION_TK3(0x6, 0x0);
PERMUTATION_TK3(0xb, 0x1);
PERMUTATION_TK3(0xd, 0x2);
#ifdef ___NUM_OF_ROUNDS_56
// 41td,43th, ... ,53th,55th round
PERMUTATION_TK3(0x4, 0x3);
PERMUTATION_TK3(0x2, 0x1);
PERMUTATION_TK3(0x8, 0x0);
PERMUTATION_TK3(0x2, 0x2);
PERMUTATION_TK3(0x9, 0x0);
PERMUTATION_TK3(0x6, 0x2);
PERMUTATION_TK3(0x9, 0x1);
PERMUTATION_TK3(0x5, 0x2);
#endif
// even
// load master key
w0 = *(uint32_t*)&roundKeys[40];
w1 = *(uint32_t*)&roundKeys[44];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[392];
#else
tk3 = (uint32_t*)&roundKeys[520];
#endif
// 2nd,4th, ... ,38th,40th round
PERMUTATION_TK3(0x3, 0x0);
PERMUTATION_TK3(0xf, 0x0);
PERMUTATION_TK3(0xe, 0x3);
PERMUTATION_TK3(0xb, 0x3);
PERMUTATION_TK3(0xf, 0x2);
PERMUTATION_TK3(0xc, 0x3);
PERMUTATION_TK3(0x3, 0x3);
PERMUTATION_TK3(0xe, 0x0);
PERMUTATION_TK3(0xa, 0x3);
PERMUTATION_TK3(0xb, 0x2);
PERMUTATION_TK3(0xc, 0x2);
PERMUTATION_TK3(0x0, 0x3);
PERMUTATION_TK3(0x2, 0x0);
PERMUTATION_TK3(0xb, 0x0);
PERMUTATION_TK3(0xe, 0x2);
PERMUTATION_TK3(0x8, 0x3);
PERMUTATION_TK3(0x3, 0x2);
PERMUTATION_TK3(0xd, 0x0);
PERMUTATION_TK3(0x6, 0x3);
PERMUTATION_TK3(0xa, 0x1);
#ifdef ___NUM_OF_ROUNDS_56
// 42nd,44th, ... ,54th,56th round
PERMUTATION_TK3(0x9, 0x2);
PERMUTATION_TK3(0x4, 0x2);
PERMUTATION_TK3(0x1, 0x1);
PERMUTATION_TK3(0x4, 0x0);
PERMUTATION_TK3(0x3, 0x1);
PERMUTATION_TK3(0xc, 0x0);
PERMUTATION_TK3(0x2, 0x3);
PERMUTATION_TK3(0xa, 0x0);
#endif
}
#else /* ___SKINNY_LOOP */
void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC)
{
uint32_t *tk3;
uint32_t t0; // used in MACRO
uint32_t t1; // used in MACRO
uint32_t t2; // used in MACRO
uint32_t w0;
uint32_t w1;
uint32_t c0;
uint32_t c1;
// odd
// load master key
w0 = *(uint32_t*)&roundKeys[32];
w1 = *(uint32_t*)&roundKeys[36];
#ifndef ___NUM_OF_ROUNDS_56
tk3 = (uint32_t*)&roundKeys[384];
#else
tk3 = (uint32_t*)&roundKeys[512];
#endif
// 1st round
*tk3++ = w0 ^ 0x01;
*tk3++ = w1;
tk3 += 2;
pRC += 4;
// 3rd,5th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<19;i++)
#else
for(int i=0;i<27;i++)
#endif
{
c0 = *pRC++;
c1 = *pRC++;
pRC += 2;
PERMUTATION_TK3(c0, c1);
}
// even
// load master key
w0 = *(uint32_t*)&roundKeys[40];
w1 = *(uint32_t*)&roundKeys[44];
#ifndef ___NUM_OF_ROUNDS_56
pRC -= 78;
tk3 = (uint32_t*)&roundKeys[392];
#else
pRC -= 110;
tk3 = (uint32_t*)&roundKeys[520];
#endif
// 2nd,4th, ...
#ifndef ___NUM_OF_ROUNDS_56
for(int i=0;i<20;i++)
#else
for(int i=0;i<28;i++)
#endif
{
c0 = *pRC++;
c1 = *pRC++;
pRC += 2;
PERMUTATION_TK3(c0, c1);
}
}
#endif /* ___SKINNY_LOOP */
#endif /* ___ENABLE_DWORD_CAST */
/******************************************************************************
* Copyright (c) 2020, NEC Corporation.
*
* THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND.
*
*****************************************************************************/
/*
* SKINNY-128-384
*
* ART(TK1) -> store
* load AC(c0 c1) ^ TK3 ^ TK2
* load TK1
* calc AC(c0 c1) ^ TK3 ^ TK2 ^ TK1 -> use at (AC->ART)
* SC->SR->(AC->ART)->MC
*
* number of rounds : 40 or 56
*/
#include "skinny.h"
/*
* S-BOX
*/
unsigned char SBOX[]
= {
// Original
0x65, 0x4c, 0x6a, 0x42, 0x4b, 0x63, 0x43, 0x6b, 0x55, 0x75, 0x5a, 0x7a, 0x53, 0x73, 0x5b, 0x7b,
0x35, 0x8c, 0x3a, 0x81, 0x89, 0x33, 0x80, 0x3b, 0x95, 0x25, 0x98, 0x2a, 0x90, 0x23, 0x99, 0x2b,
0xe5, 0xcc, 0xe8, 0xc1, 0xc9, 0xe0, 0xc0, 0xe9, 0xd5, 0xf5, 0xd8, 0xf8, 0xd0, 0xf0, 0xd9, 0xf9,
0xa5, 0x1c, 0xa8, 0x12, 0x1b, 0xa0, 0x13, 0xa9, 0x05, 0xb5, 0x0a, 0xb8, 0x03, 0xb0, 0x0b, 0xb9,
0x32, 0x88, 0x3c, 0x85, 0x8d, 0x34, 0x84, 0x3d, 0x91, 0x22, 0x9c, 0x2c, 0x94, 0x24, 0x9d, 0x2d,
0x62, 0x4a, 0x6c, 0x45, 0x4d, 0x64, 0x44, 0x6d, 0x52, 0x72, 0x5c, 0x7c, 0x54, 0x74, 0x5d, 0x7d,
0xa1, 0x1a, 0xac, 0x15, 0x1d, 0xa4, 0x14, 0xad, 0x02, 0xb1, 0x0c, 0xbc, 0x04, 0xb4, 0x0d, 0xbd,
0xe1, 0xc8, 0xec, 0xc5, 0xcd, 0xe4, 0xc4, 0xed, 0xd1, 0xf1, 0xdc, 0xfc, 0xd4, 0xf4, 0xdd, 0xfd,
0x36, 0x8e, 0x38, 0x82, 0x8b, 0x30, 0x83, 0x39, 0x96, 0x26, 0x9a, 0x28, 0x93, 0x20, 0x9b, 0x29,
0x66, 0x4e, 0x68, 0x41, 0x49, 0x60, 0x40, 0x69, 0x56, 0x76, 0x58, 0x78, 0x50, 0x70, 0x59, 0x79,
0xa6, 0x1e, 0xaa, 0x11, 0x19, 0xa3, 0x10, 0xab, 0x06, 0xb6, 0x08, 0xba, 0x00, 0xb3, 0x09, 0xbb,
0xe6, 0xce, 0xea, 0xc2, 0xcb, 0xe3, 0xc3, 0xeb, 0xd6, 0xf6, 0xda, 0xfa, 0xd3, 0xf3, 0xdb, 0xfb,
0x31, 0x8a, 0x3e, 0x86, 0x8f, 0x37, 0x87, 0x3f, 0x92, 0x21, 0x9e, 0x2e, 0x97, 0x27, 0x9f, 0x2f,
0x61, 0x48, 0x6e, 0x46, 0x4f, 0x67, 0x47, 0x6f, 0x51, 0x71, 0x5e, 0x7e, 0x57, 0x77, 0x5f, 0x7f,
0xa2, 0x18, 0xae, 0x16, 0x1f, 0xa7, 0x17, 0xaf, 0x01, 0xb2, 0x0e, 0xbe, 0x07, 0xb7, 0x0f, 0xbf,
0xe2, 0xca, 0xee, 0xc6, 0xcf, 0xe7, 0xc7, 0xef, 0xd2, 0xf2, 0xde, 0xfe, 0xd7, 0xf7, 0xdf, 0xff,
};
/*
* S-BOX ^ AC(c2)
*/
unsigned char SBOX2[]
= { // Original ^ c2(0x02)
0x67, 0x4e, 0x68, 0x40, 0x49, 0x61, 0x41, 0x69, 0x57, 0x77, 0x58, 0x78, 0x51, 0x71, 0x59, 0x79,
0x37, 0x8e, 0x38, 0x83, 0x8b, 0x31, 0x82, 0x39, 0x97, 0x27, 0x9a, 0x28, 0x92, 0x21, 0x9b, 0x29,
0xe7, 0xce, 0xea, 0xc3, 0xcb, 0xe2, 0xc2, 0xeb, 0xd7, 0xf7, 0xda, 0xfa, 0xd2, 0xf2, 0xdb, 0xfb,
0xa7, 0x1e, 0xaa, 0x10, 0x19, 0xa2, 0x11, 0xab, 0x07, 0xb7, 0x08, 0xba, 0x01, 0xb2, 0x09, 0xbb,
0x30, 0x8a, 0x3e, 0x87, 0x8f, 0x36, 0x86, 0x3f, 0x93, 0x20, 0x9e, 0x2e, 0x96, 0x26, 0x9f, 0x2f,
0x60, 0x48, 0x6e, 0x47, 0x4f, 0x66, 0x46, 0x6f, 0x50, 0x70, 0x5e, 0x7e, 0x56, 0x76, 0x5f, 0x7f,
0xa3, 0x18, 0xae, 0x17, 0x1f, 0xa6, 0x16, 0xaf, 0x00, 0xb3, 0x0e, 0xbe, 0x06, 0xb6, 0x0f, 0xbf,
0xe3, 0xca, 0xee, 0xc7, 0xcf, 0xe6, 0xc6, 0xef, 0xd3, 0xf3, 0xde, 0xfe, 0xd6, 0xf6, 0xdf, 0xff,
0x34, 0x8c, 0x3a, 0x80, 0x89, 0x32, 0x81, 0x3b, 0x94, 0x24, 0x98, 0x2a, 0x91, 0x22, 0x99, 0x2b,
0x64, 0x4c, 0x6a, 0x43, 0x4b, 0x62, 0x42, 0x6b, 0x54, 0x74, 0x5a, 0x7a, 0x52, 0x72, 0x5b, 0x7b,
0xa4, 0x1c, 0xa8, 0x13, 0x1b, 0xa1, 0x12, 0xa9, 0x04, 0xb4, 0x0a, 0xb8, 0x02, 0xb1, 0x0b, 0xb9,
0xe4, 0xcc, 0xe8, 0xc0, 0xc9, 0xe1, 0xc1, 0xe9, 0xd4, 0xf4, 0xd8, 0xf8, 0xd1, 0xf1, 0xd9, 0xf9,
0x33, 0x88, 0x3c, 0x84, 0x8d, 0x35, 0x85, 0x3d, 0x90, 0x23, 0x9c, 0x2c, 0x95, 0x25, 0x9d, 0x2d,
0x63, 0x4a, 0x6c, 0x44, 0x4d, 0x65, 0x45, 0x6d, 0x53, 0x73, 0x5c, 0x7c, 0x55, 0x75, 0x5d, 0x7d,
0xa0, 0x1a, 0xac, 0x14, 0x1d, 0xa5, 0x15, 0xad, 0x03, 0xb0, 0x0c, 0xbc, 0x05, 0xb5, 0x0d, 0xbd,
0xe0, 0xc8, 0xec, 0xc4, 0xcd, 0xe5, 0xc5, 0xed, 0xd0, 0xf0, 0xdc, 0xfc, 0xd5, 0xf5, 0xdd, 0xfd,
};
#ifdef ___SKINNY_LOOP
/*
* Round Constants
*/
unsigned char RC[]
= {
0x01, 0x00, 0x03, 0x00, 0x07, 0x00, 0x0f, 0x00, 0x0f, 0x01, 0x0e, 0x03, 0x0d, 0x03, 0x0b, 0x03,
0x07, 0x03, 0x0f, 0x02, 0x0e, 0x01, 0x0c, 0x03, 0x09, 0x03, 0x03, 0x03, 0x07, 0x02, 0x0e, 0x00,
0x0d, 0x01, 0x0a, 0x03, 0x05, 0x03, 0x0b, 0x02, 0x06, 0x01, 0x0c, 0x02, 0x08, 0x01, 0x00, 0x03,
0x01, 0x02, 0x02, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x07, 0x01, 0x0e, 0x02, 0x0c, 0x01, 0x08, 0x03,
0x01, 0x03, 0x03, 0x02, 0x06, 0x00, 0x0d, 0x00, 0x0b, 0x01, 0x06, 0x03, 0x0d, 0x02, 0x0a, 0x01,
#ifdef ___NUM_OF_ROUNDS_56
0x04, 0x03, 0x09, 0x02, 0x02, 0x01, 0x04, 0x02, 0x08, 0x00, 0x01, 0x01, 0x02, 0x02, 0x04, 0x00,
0x09, 0x00, 0x03, 0x01, 0x06, 0x02, 0x0c, 0x00, 0x09, 0x01, 0x02, 0x03, 0x05, 0x02, 0x0a, 0x00,
#endif
};
#endif
extern void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2);
extern void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys);
#ifdef ___SKINNY_LOOP
extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC);
#else
extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys);
#endif
void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K)
{
uint32_t *pt = (uint32_t*)&pskinny_ctrl->roundKeys[0];
pt[0] = *(uint32_t*)(&CNT[0]);
pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]);
pt[4] = *(uint32_t*)(&T[0]);
pack_word(T[7], T[4], T[5], T[6], pt[5]);
pt[6] = *(uint32_t*)(&T[8]);
pack_word(T[15], T[12], T[13], T[14], pt[7]);
pt[8] = *(uint32_t*)(&K[0]);
pack_word(K[7], K[4], K[5], K[6], pt[9]);
pt[10] = *(uint32_t*)(&K[8]);
pack_word(K[15], K[12], K[13], K[14], pt[11]);
#ifdef ___SKINNY_LOOP
RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys, RC);
#else
RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys);
#endif
RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys);
Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2);
pskinny_ctrl->func_skinny_128_384_enc = skinny_128_384_enc12_12;
}
void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K)
{
(void)K;
uint32_t *pt = &pskinny_ctrl->roundKeys[0];
pt[0] = *(uint32_t*)(&CNT[0]);
pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]);
pt[4] = *(uint32_t*)(&T[0]);
pack_word(T[7], T[4], T[5], T[6], pt[5]);
pt[6] = *(uint32_t*)(&T[8]);
pack_word(T[15], T[12], T[13], T[14], pt[7]);
RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys);
Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2);
}
extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K)
{
(void)T;
(void)K;
uint32_t *pt = &pskinny_ctrl->roundKeys[0];
pt[0] = *(uint32_t*)(&CNT[0]);
pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]);
Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2);
}
#define PERMUTATION_TK1() \
\
/* permutation */ \
{ \
unsigned char tmp0 = roundKeys[0]; \
unsigned char tmp1 = roundKeys[1]; \
unsigned char tmp2 = roundKeys[2]; \
unsigned char tmp3 = roundKeys[3]; \
unsigned char tmp4 = roundKeys[4]; \
unsigned char tmp5 = roundKeys[5]; \
unsigned char tmp6 = roundKeys[6]; \
unsigned char tmp7 = roundKeys[7]; \
\
unsigned char* dst = &roundKeys[8]; \
\
/* 5 7 2 3 6 0 4 1 */ \
*dst++ = tmp1; \
*dst++ = tmp4; \
*dst++ = tmp0; \
*dst++ = tmp6; \
*dst++ = tmp3; \
*dst++ = tmp2; \
*dst++ = tmp7; \
*dst++ = tmp5; \
\
/* 2 5 0 6 7 1 3 4 */ \
*dst++ = tmp4; \
*dst++ = tmp3; \
*dst++ = tmp1; \
*dst++ = tmp7; \
*dst++ = tmp6; \
*dst++ = tmp0; \
*dst++ = tmp5; \
*dst++ = tmp2; \
\
/* 0 2 1 7 5 4 6 3 */ \
*dst++ = tmp3; \
*dst++ = tmp6; \
*dst++ = tmp4; \
*dst++ = tmp5; \
*dst++ = tmp7; \
*dst++ = tmp1; \
*dst++ = tmp2; \
*dst++ = tmp0; \
\
/* 1 0 4 5 2 3 7 6 */ \
*dst++ = tmp6; \
*dst++ = tmp7; \
*dst++ = tmp3; \
*dst++ = tmp2; \
*dst++ = tmp5; \
*dst++ = tmp4; \
*dst++ = tmp0; \
*dst++ = tmp1; \
\
/* 4 1 3 2 0 6 5 7 */ \
*dst++ = tmp7; \
*dst++ = tmp5; \
*dst++ = tmp6; \
*dst++ = tmp0; \
*dst++ = tmp2; \
*dst++ = tmp3; \
*dst++ = tmp1; \
*dst++ = tmp4; \
\
/* 3 4 6 0 1 7 2 5 */ \
*dst++ = tmp5; \
*dst++ = tmp2; \
*dst++ = tmp7; \
*dst++ = tmp1; \
*dst++ = tmp0; \
*dst++ = tmp6; \
*dst++ = tmp4; \
*dst++ = tmp3; \
\
/* 6 3 7 1 4 5 0 2 */ \
*dst++ = tmp2; \
*dst++ = tmp0; \
*dst++ = tmp5; \
*dst++ = tmp4; \
*dst++ = tmp1; \
*dst++ = tmp7; \
*dst++ = tmp3; \
*dst++ = tmp6; \
}
#define SBOX_0(b0, b1, b2, b3) \
\
t0 = sbox[b0]; \
t1 = sbox[b1]; \
t2 = sbox[b2]; \
t3 = sbox[b3]; \
\
b0 = (uint8_t)t0; \
b1 = (uint8_t)t1; \
b2 = (uint8_t)t2; \
b3 = (uint8_t)t3;
#define SBOX_8(b0, b1, b2, b3) \
\
t0 = sbox[b0]; \
t1 = sbox[b1]; \
t2 = sbox[b2]; \
t3 = sbox[b3]; \
\
b0 = (uint8_t)t3; \
b1 = (uint8_t)t0; \
b2 = (uint8_t)t1; \
b3 = (uint8_t)t2;
#define SBOX_16(b0, b1, b2, b3) \
\
t0 = sbox2[b0]; /* AC(c2) */ \
t1 = sbox[b1]; \
t2 = sbox[b2]; \
t3 = sbox[b3]; \
\
b0 = (uint8_t)t2; \
b1 = (uint8_t)t3; \
b2 = (uint8_t)t0; \
b3 = (uint8_t)t1;
#define SBOX_24(b0, b1, b2, b3) \
\
t0 = sbox[b0]; \
t1 = sbox[b1]; \
t2 = sbox[b2]; \
t3 = sbox[b3]; \
\
b0 = (uint8_t)t1; \
b1 = (uint8_t)t2; \
b2 = (uint8_t)t3; \
b3 = (uint8_t)t0;
#ifdef ___ENABLE_DWORD_CAST
#define SKINNY_MAIN() \
{ \
\
/* odd */ \
\
/* LUT(with ShiftRows & AC(c2))*/ \
\
SBOX_0( block[0], block[1], block[2], block[3]); \
SBOX_8( block[4], block[5], block[6], block[7]); \
SBOX_16(block[8], block[9], block[10], block[11]); \
SBOX_24(block[12], block[13], block[14], block[15]); \
\
/* TK1^TK2^TK3^AC(c0 c1) */ \
\
t1 = *(uint64_t*)&block[0]; \
t1 ^= *tk1++; \
t1 ^= *tk2++; \
\
/* MC */ \
\
t2 = *(uint64_t*)&block[8]; \
t0 = t2 >> 32; \
\
/* 0^2 */ \
t3 = t1 ^ t2; \
\
/* 1^2 */ \
t2 = (t1 >> 32) ^ t2; \
\
/* 0^2^3 */ \
t0 = t0 ^ t3; \
\
*(uint32_t*)&block[0] = (uint32_t)t0; \
*(uint32_t*)&block[4] = (uint32_t)t1; \
*(uint32_t*)&block[8] = (uint32_t)t2; \
*(uint32_t*)&block[12] = (uint32_t)t3; \
\
/* even */ \
\
/* LUT(with ShiftRows & AC(c2))*/ \
\
SBOX_0( block[0], block[1], block[2], block[3]); \
SBOX_8( block[4], block[5], block[6], block[7]); \
SBOX_16(block[8], block[9], block[10], block[11]); \
SBOX_24(block[12], block[13], block[14], block[15]); \
\
/* TK2^TK3^AC(c0 c1) */ \
\
t1 = *(uint64_t*)&block[0]; \
t1 ^= *tk2++; \
\
/* MC */ \
\
t2 = *(uint64_t*)&block[8]; \
t0 = t2 >> 32; \
\
/* 0^2 */ \
t3 = t1 ^ t2; \
\
/* 1^2 */ \
t2 = (t1 >> 32) ^ t2; \
\
/* 0^2^3 */ \
t0 = t0 ^ t3; \
\
*(uint32_t*)&block[0] = (uint32_t)t0; \
*(uint32_t*)&block[4] = (uint32_t)t1; \
*(uint32_t*)&block[8] = (uint32_t)t2; \
*(uint32_t*)&block[12] = (uint32_t)t3; \
}
#ifndef ___SKINNY_LOOP
void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2)
{
uint64_t *tk1;
uint64_t *tk2;
uint64_t t0; // used in MACRO
uint64_t t1; // used in MACRO
uint64_t t2; // used in MACRO
uint64_t t3; // used in MACRO
// TK1
PERMUTATION_TK1();
// SB+AC+ShR+MC
tk2 = (uint64_t*)&roundKeys[64];
tk1 = (uint64_t*)&roundKeys[0];
// 1st, ...,16th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
tk1 = (uint64_t*)&roundKeys[0];
// 17th, ...,32th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
tk1 = (uint64_t*)&roundKeys[0];
// 33th, ...,40th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
#ifdef ___NUM_OF_ROUNDS_56
// 41th, ...,48th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
tk1 = (uint64_t*)&roundKeys[0];
// 49th, ... ,56th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
#endif
}
#else /* ___SKINNY_LOOP */
void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2)
{
uint64_t *tk1;
uint64_t *tk2;
uint64_t t0; // used in MACRO
uint64_t t1; // used in MACRO
uint64_t t2; // used in MACRO
uint64_t t3; // used in MACRO
// TK1
PERMUTATION_TK1();
// SB+AC+ShR+MC
tk2 = (uint64_t*)&roundKeys[64];
// 1st, ... ,32th or 48th round
#ifndef ___NUM_OF_ROUNDS_56
for(int j=0;j<2;j++)
#else
for(int j=0;j<3;j++)
#endif
{
tk1 = (uint64_t*)&roundKeys[0];
for(int i=0;i<8;i++)
{
SKINNY_MAIN();
}
}
// 33th , ... ,40th or 49th, .... ,56th round
{
tk1 = (uint64_t*)&roundKeys[0];
for(int i=0;i<4;i++)
{
SKINNY_MAIN();
}
}
}
#endif /* ___SKINNY_LOOP */
#else /* ___ENABLE_DWORD_CAST */
#define SKINNY_MAIN() \
{ \
\
/* odd */ \
\
/* LUT(with ShiftRows & AC(c2))*/ \
\
SBOX_0( block[0], block[1], block[2], block[3]); \
SBOX_8( block[4], block[5], block[6], block[7]); \
SBOX_16(block[8], block[9], block[10], block[11]); \
SBOX_24(block[12], block[13], block[14], block[15]); \
\
/* TK1^TK2^TK3^AC(c0 c1) */ \
\
t1 = *(uint32_t*)&block[0]; \
t0 = *(uint32_t*)&block[4]; \
t1 ^= *tk1++; \
t1 ^= *tk2++; \
t0 ^= *tk1++; \
t0 ^= *tk2++; \
\
/* MC */ \
\
t2 = *(uint32_t*)&block[8]; \
t4 = *(uint32_t*)&block[12]; \
\
/* 0^2 */ \
t3 = t1 ^ t2; \
\
/* 1^2 */ \
t2 = t0 ^ t2; \
\
/* 0^2^3 */ \
t0 = t3 ^ t4; \
\
*(uint32_t*)&block[0] = t0; \
*(uint32_t*)&block[4] = t1; \
*(uint32_t*)&block[8] = t2; \
*(uint32_t*)&block[12] = t3; \
\
/* even */ \
\
/* LUT(with ShiftRows & AC(c2))*/ \
\
SBOX_0( block[0], block[1], block[2], block[3]); \
SBOX_8( block[4], block[5], block[6], block[7]); \
SBOX_16(block[8], block[9], block[10], block[11]); \
SBOX_24(block[12], block[13], block[14], block[15]); \
\
/* TK2^TK3^AC(c0 c1) */ \
\
t1 = *(uint32_t*)&block[0]; \
t0 = *(uint32_t*)&block[4]; \
t1 ^= *tk2++; \
t0 ^= *tk2++; \
\
/* MC */ \
\
t2 = *(uint32_t*)&block[8]; \
t4 = *(uint32_t*)&block[12]; \
\
/* 0^2 */ \
t3 = t1 ^ t2; \
\
/* 1^2 */ \
t2 = t0 ^ t2; \
\
/* 0^2^3 */ \
t0 = t3 ^ t4; \
\
*(uint32_t*)&block[0] = t0; \
*(uint32_t*)&block[4] = t1; \
*(uint32_t*)&block[8] = t2; \
*(uint32_t*)&block[12] = t3; \
}
#ifndef ___SKINNY_LOOP
void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2)
{
uint32_t *tk1;
uint32_t *tk2;
uint32_t t0; // used in MACRO
uint32_t t1; // used in MACRO
uint32_t t2; // used in MACRO
uint32_t t3; // used in MACRO
uint32_t t4; // used in MACRO
// TK1
PERMUTATION_TK1();
// SB+AC+ShR+MC
tk2 = (uint32_t*)&roundKeys[64];
tk1 = (uint32_t*)&roundKeys[0];
// 1st, ...,16th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
tk1 = (uint32_t*)&roundKeys[0];
// 17th, ...,32th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
tk1 = (uint32_t*)&roundKeys[0];
// 33th, ...,40th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
#ifdef ___NUM_OF_ROUNDS_56
// 41th, ...,48th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
tk1 = (uint32_t*)&roundKeys[0];
// 49th, ... ,56th round
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
SKINNY_MAIN();
#endif
}
#else /* ___SKINNY_LOOP */
void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2)
{
uint32_t *tk1;
uint32_t *tk2;
uint32_t t0; // used in MACRO
uint32_t t1; // used in MACRO
uint32_t t2; // used in MACRO
uint32_t t3; // used in MACRO
uint32_t t4; // used in MACRO
// TK1
PERMUTATION_TK1();
// SB+AC+ShR+MC
tk2 = (uint32_t*)&roundKeys[64];
// 1st, ... ,32th or 48th round
#ifndef ___NUM_OF_ROUNDS_56
for(int j=0;j<2;j++)
#else
for(int j=0;j<3;j++)
#endif
{
tk1 = (uint32_t*)&roundKeys[0];
for(int i=0;i<8;i++)
{
SKINNY_MAIN();
}
}
// 33th , ... ,40th or 49th, .... ,56th round
{
tk1 = (uint32_t*)&roundKeys[0];
for(int i=0;i<4;i++)
{
SKINNY_MAIN();
}
}
}
#endif /* ___SKINNY_LOOP */
#endif /* ___ENABLE_DWORD_CAST */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment