diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/api.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/core.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/core.c new file mode 100644 index 0000000..1f75093 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/core.c @@ -0,0 +1,123 @@ +#include "core.h" + +void process_data(state* s, unsigned char* out, const unsigned char* in, + unsigned long long len, u8 mode) { + u32_2 t0, t1; + u64 tmp0, tmp1; + u64 i; + + while (len >= RATE) { + tmp0 = U64BIG(*(u64*)in); + t0 = to_bit_interleaving(tmp0); + s->x0.e ^= t0.e; + s->x0.o ^= t0.o; + tmp1 = U64BIG(*(u64*)(in + 8)); + t1 = to_bit_interleaving(tmp1); + s->x1.e ^= t1.e; + s->x1.o ^= t1.o; + if (mode != ASCON_AD) { + tmp0 = from_bit_interleaving(s->x0); + *(u64*)out = U64BIG(tmp0); + tmp1 = from_bit_interleaving(s->x1); + *(u64*)(out + 8) = U64BIG(tmp1); + } + if (mode == ASCON_DEC) { + s->x0 = t0; + s->x1 = t1; + } + P(s, PB_ROUNDS); + in += RATE; + out += RATE; + len -= RATE; + } + + tmp0 = 0; + tmp1 = 0; + for (i = 0; i < len; ++i, ++in) + if (i < 8) + tmp0 ^= INS_BYTE64(*in, i); + else + tmp1 ^= INS_BYTE64(*in, i % 8); + in -= len; + if (len < 8) + tmp0 ^= INS_BYTE64(0x80, len); + else + tmp1 ^= INS_BYTE64(0x80, len % 8); + t0 = to_bit_interleaving(tmp0); + s->x0.e ^= t0.e; + s->x0.o ^= t0.o; + t1 = to_bit_interleaving(tmp1); + s->x1.e ^= t1.e; + s->x1.o ^= t1.o; + if (mode != ASCON_AD) { + tmp0 = from_bit_interleaving(s->x0); + tmp1 = from_bit_interleaving(s->x1); + for (i = 0; i < len; ++i, ++out) + if (i < 8) + *out = EXT_BYTE64(tmp0, i); + else + *out = EXT_BYTE64(tmp1, i % 8); + } + if (mode == ASCON_DEC) { + for (i = 0; i < len; ++i, ++in) + if (i < 8) { + tmp0 &= ~INS_BYTE64(0xff, i); + tmp0 |= INS_BYTE64(*in, i); + } else { + tmp1 &= ~INS_BYTE64(0xff, i % 8); + tmp1 |= INS_BYTE64(*in, i % 8); + } + s->x0 = to_bit_interleaving(tmp0); + s->x1 = to_bit_interleaving(tmp1); + } +} + +void ascon_core(state* s, unsigned char* out, const unsigned char* in, + unsigned long long tlen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k, u8 mode) { + u32_2 K0, K1, N0, N1; + + // load key and nonce + K0 = to_bit_interleaving(U64BIG(*(u64*)k)); + K1 = to_bit_interleaving(U64BIG(*(u64*)(k + 8))); + N0 = to_bit_interleaving(U64BIG(*(u64*)npub)); + N1 = to_bit_interleaving(U64BIG(*(u64*)(npub + 8))); + + // initialization + s->x0 = to_bit_interleaving(IV); + s->x1.o = K0.o; + s->x1.e = K0.e; + s->x2.e = K1.e; + s->x2.o = K1.o; + s->x3.e = N0.e; + s->x3.o = N0.o; + s->x4.e = N1.e; + s->x4.o = N1.o; + P(s, PA_ROUNDS); + s->x3.e ^= K0.e; + s->x3.o ^= K0.o; + s->x4.e ^= K1.e; + s->x4.o ^= K1.o; + + // process associated data + if (adlen) { + process_data(s, (void*)0, ad, adlen, ASCON_AD); + P(s, PB_ROUNDS); + } + s->x4.e ^= 1; + + // process plaintext/ciphertext + process_data(s, out, in, tlen, mode); + + // finalization + s->x2.e ^= K0.e; + s->x2.o ^= K0.o; + s->x3.e ^= K1.e; + s->x3.o ^= K1.o; + P(s, PA_ROUNDS); + s->x3.e ^= K0.e; + s->x3.o ^= K0.o; + s->x4.e ^= K1.e; + s->x4.o ^= K1.o; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/core.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/core.h new file mode 100644 index 0000000..4a5330f --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/core.h @@ -0,0 +1,27 @@ +#ifndef CORE_H_ +#define CORE_H_ + +#include "api.h" +#include "endian.h" +#include "permutations.h" + +#define ASCON_AD 0 +#define ASCON_ENC 1 +#define ASCON_DEC 2 + +#define RATE (128 / 8) +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define IV \ + ((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \ + (u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32) + +void process_data(state* s, unsigned char* out, const unsigned char* in, + unsigned long long len, u8 mode); + +void ascon_core(state* s, unsigned char* out, const unsigned char* in, + unsigned long long tlen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k, u8 mode); + +#endif // CORE_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/decrypt.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/decrypt.c new file mode 100644 index 0000000..7e9dd1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/decrypt.c @@ -0,0 +1,32 @@ +#include "core.h" + +int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen, + unsigned char* nsec, const unsigned char* c, + unsigned long long clen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k) { + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + + state s; + u32_2 t0, t1; + (void)nsec; + + // set plaintext size + *mlen = clen - CRYPTO_ABYTES; + + ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC); + + // verify tag (should be constant time, check compiler output) + t0 = to_bit_interleaving(U64BIG(*(u64*)(c + *mlen))); + t1 = to_bit_interleaving(U64BIG(*(u64*)(c + *mlen + 8))); + if (((s.x3.e ^ t0.e) | (s.x3.o ^ t0.o) | (s.x4.e ^ t1.e) | (s.x4.o ^ t1.o)) != + 0) { + *mlen = 0; + return -1; + } + + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/encrypt.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/encrypt.c new file mode 100644 index 0000000..b5dc587 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/encrypt.c @@ -0,0 +1,24 @@ +#include "core.h" + +int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, const unsigned char* npub, + const unsigned char* k) { + state s; + u64 tmp0, tmp1; + (void)nsec; + + // set ciphertext size + *clen = mlen + CRYPTO_ABYTES; + + ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC); + + // set tag + tmp0 = from_bit_interleaving(s.x3); + *(u64*)(c + mlen) = U64BIG(tmp0); + tmp1 = from_bit_interleaving(s.x4); + *(u64*)(c + mlen + 8) = U64BIG(tmp1); + + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/endian.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/endian.h new file mode 100644 index 0000000..b4d18f5 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/endian.h @@ -0,0 +1,29 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +// macros for big endian machines +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +// macros for little endian machines +#define U64BIG(x) \ + ((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \ + (((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \ + (((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \ + (((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56)) +#define U32BIG(x) \ + ((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \ + (((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24)) +#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8)) + +#else +#error "ascon byte order macros not defined in endian.h" +#endif + +#endif // ENDIAN_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/implementors b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/permutations.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/permutations.c new file mode 100644 index 0000000..bc47f5f --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/permutations.c @@ -0,0 +1,50 @@ +#include "permutations.h" + +static const u8 constants[][2] = { + {0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, {0x6, 0xc}, {0x3, 0xc}, + {0x6, 0x9}, {0x3, 0x9}, {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 +u32_2 to_bit_interleaving(u64 in) { + u32 hi = (in) >> 32; + u32 lo = (u32)(in); + u32 r0, r1; + u32_2 out; + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + out.e = (lo & 0x0000FFFF) | (hi << 16); + out.o = (lo >> 16) | (hi & 0xFFFF0000); + return out; +} + +// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 +u64 from_bit_interleaving(u32_2 in) { + u32 lo = (in.e & 0x0000FFFF) | (in.o << 16); + u32 hi = (in.e >> 16) | (in.o & 0xFFFF0000); + u32 r0, r1; + u64 out; + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + out = (u64)hi << 32 | lo; + return out; +} + +void P(state *p, u8 rounds) { + state s = *p; + u32_2 t0, t1, t2, t3, t4; + u32 i, start = START_ROUND(rounds); + for (i = start; i < 12; i++) ROUND(constants[i][0], constants[i][1]); + *p = s; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/permutations.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/permutations.h new file mode 100644 index 0000000..bc643ce --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/permutations.h @@ -0,0 +1,71 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; + +typedef struct { + u32 e; + u32 o; +} u32_2; + +typedef struct { + u32_2 x0; + u32_2 x1; + u32_2 x2; + u32_2 x3; + u32_2 x4; +} state; + +#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) +#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) +#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) +#define START_ROUND(x) (12 - (x)) + +// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 +u32_2 to_bit_interleaving(u64 in); + +// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 +u64 from_bit_interleaving(u32_2 in); + +/* clang-format off */ +#define ROUND(C_e, C_o) \ + do { \ + /* round constant */ \ + s.x2.e ^= C_e; s.x2.o ^= C_o; \ + /* s-box layer */ \ + s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \ + s.x4.e ^= s.x3.e; s.x4.o ^= s.x3.o; \ + s.x2.e ^= s.x1.e; s.x2.o ^= s.x1.o; \ + t0.e = s.x0.e; t0.o = s.x0.o; \ + t4.e = s.x4.e; t4.o = s.x4.o; \ + t3.e = s.x3.e; t3.o = s.x3.o; \ + t1.e = s.x1.e; t1.o = s.x1.o; \ + t2.e = s.x2.e; t2.o = s.x2.o; \ + s.x0.e = t0.e ^ (~t1.e & t2.e); s.x0.o = t0.o ^ (~t1.o & t2.o); \ + s.x2.e = t2.e ^ (~t3.e & t4.e); s.x2.o = t2.o ^ (~t3.o & t4.o); \ + s.x4.e = t4.e ^ (~t0.e & t1.e); s.x4.o = t4.o ^ (~t0.o & t1.o); \ + s.x1.e = t1.e ^ (~t2.e & t3.e); s.x1.o = t1.o ^ (~t2.o & t3.o); \ + s.x3.e = t3.e ^ (~t4.e & t0.e); s.x3.o = t3.o ^ (~t4.o & t0.o); \ + s.x1.e ^= s.x0.e; s.x1.o ^= s.x0.o; \ + s.x3.e ^= s.x2.e; s.x3.o ^= s.x2.o; \ + s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \ + /* linear layer */ \ + t0.e = s.x0.e ^ ROTR32(s.x0.o, 4); t0.o = s.x0.o ^ ROTR32(s.x0.e, 5); \ + t1.e = s.x1.e ^ ROTR32(s.x1.e, 11); t1.o = s.x1.o ^ ROTR32(s.x1.o, 11); \ + t2.e = s.x2.e ^ ROTR32(s.x2.o, 2); t2.o = s.x2.o ^ ROTR32(s.x2.e, 3); \ + t3.e = s.x3.e ^ ROTR32(s.x3.o, 3); t3.o = s.x3.o ^ ROTR32(s.x3.e, 4); \ + t4.e = s.x4.e ^ ROTR32(s.x4.e, 17); t4.o = s.x4.o ^ ROTR32(s.x4.o, 17); \ + s.x0.e ^= ROTR32(t0.o, 9); s.x0.o ^= ROTR32(t0.e, 10); \ + s.x1.e ^= ROTR32(t1.o, 19); s.x1.o ^= ROTR32(t1.e, 20); \ + s.x2.e ^= t2.o; s.x2.o ^= ROTR32(t2.e, 1); \ + s.x3.e ^= ROTR32(t3.e, 5); s.x3.o ^= ROTR32(t3.o, 5); \ + s.x4.e ^= ROTR32(t4.o, 3); s.x4.o ^= ROTR32(t4.e, 4); \ + s.x2.e = ~s.x2.e; s.x2.o = ~s.x2.o; \ + } while(0) +/* clang-format on */ + +void P(state *p, u8 rounds); + +#endif // PERMUTATIONS_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/api.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/core.c b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/core.c new file mode 100644 index 0000000..676f436 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/core.c @@ -0,0 +1,78 @@ +#include "core.h" + +void process_data(state* s, unsigned char* out, const unsigned char* in, + unsigned long long len, u8 mode) { + u64* x; + u64 i; + + while (len >= RATE) { + s->x0 ^= U64BIG(*(u64*)in); + s->x1 ^= U64BIG(*(u64*)(in + 8)); + if (mode != ASCON_AD) { + *(u64*)out = U64BIG(s->x0); + *(u64*)(out + 8) = U64BIG(s->x1); + } + if (mode == ASCON_DEC) { + s->x0 = U64BIG(*((u64*)in)); + s->x1 = U64BIG(*((u64*)(in + 8))); + } + P(s, PB_ROUNDS); + in += RATE; + out += RATE; + len -= RATE; + } + + for (i = 0; i < len; ++i, ++out, ++in) { + if (i < 8) + x = &(s->x0); + else + x = &(s->x1); + *x ^= INS_BYTE64(*in, i % 8); + if (mode != ASCON_AD) *out = EXT_BYTE64(*x, i % 8); + if (mode == ASCON_DEC) { + *x &= ~INS_BYTE64(0xff, i % 8); + *x |= INS_BYTE64(*in, i % 8); + } + } + if (len < 8) + s->x0 ^= INS_BYTE64(0x80, len); + else + s->x1 ^= INS_BYTE64(0x80, len % 8); +} + +void ascon_core(state* s, unsigned char* out, const unsigned char* in, + unsigned long long tlen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k, u8 mode) { + const u64 K0 = U64BIG(*(u64*)k); + const u64 K1 = U64BIG(*(u64*)(k + 8)); + const u64 N0 = U64BIG(*(u64*)npub); + const u64 N1 = U64BIG(*(u64*)(npub + 8)); + + // initialization + s->x0 = IV; + s->x1 = K0; + s->x2 = K1; + s->x3 = N0; + s->x4 = N1; + P(s, PA_ROUNDS); + s->x3 ^= K0; + s->x4 ^= K1; + + // process associated data + if (adlen) { + process_data(s, (void*)0, ad, adlen, ASCON_AD); + P(s, PB_ROUNDS); + } + s->x4 ^= 1; + + // process plaintext/ciphertext + process_data(s, out, in, tlen, mode); + + // finalization + s->x2 ^= K0; + s->x3 ^= K1; + P(s, PA_ROUNDS); + s->x3 ^= K0; + s->x4 ^= K1; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/core.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/core.h new file mode 100644 index 0000000..4a5330f --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/core.h @@ -0,0 +1,27 @@ +#ifndef CORE_H_ +#define CORE_H_ + +#include "api.h" +#include "endian.h" +#include "permutations.h" + +#define ASCON_AD 0 +#define ASCON_ENC 1 +#define ASCON_DEC 2 + +#define RATE (128 / 8) +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define IV \ + ((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \ + (u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32) + +void process_data(state* s, unsigned char* out, const unsigned char* in, + unsigned long long len, u8 mode); + +void ascon_core(state* s, unsigned char* out, const unsigned char* in, + unsigned long long tlen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k, u8 mode); + +#endif // CORE_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/decrypt.c b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/decrypt.c new file mode 100644 index 0000000..0cde81e --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/decrypt.c @@ -0,0 +1,29 @@ +#include "core.h" + +int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen, + unsigned char* nsec, const unsigned char* c, + unsigned long long clen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k) { + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + + state s; + (void)nsec; + + // set plaintext size + *mlen = clen - CRYPTO_ABYTES; + + ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC); + + // verify tag (should be constant time, check compiler output) + if (((s.x3 ^ U64BIG(*(u64*)(c + *mlen))) | + (s.x4 ^ U64BIG(*(u64*)(c + *mlen + 8)))) != 0) { + *mlen = 0; + return -1; + } + + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/encrypt.c b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/encrypt.c new file mode 100644 index 0000000..5961c60 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/encrypt.c @@ -0,0 +1,21 @@ +#include "core.h" + +int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, const unsigned char* npub, + const unsigned char* k) { + state s; + (void)nsec; + + // set ciphertext size + *clen = mlen + CRYPTO_ABYTES; + + ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC); + + // set tag + *(u64*)(c + mlen) = U64BIG(s.x3); + *(u64*)(c + mlen + 8) = U64BIG(s.x4); + + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/endian.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/endian.h new file mode 100644 index 0000000..b4d18f5 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/endian.h @@ -0,0 +1,29 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +// macros for big endian machines +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +// macros for little endian machines +#define U64BIG(x) \ + ((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \ + (((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \ + (((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \ + (((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56)) +#define U32BIG(x) \ + ((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \ + (((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24)) +#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8)) + +#else +#error "ascon byte order macros not defined in endian.h" +#endif + +#endif // ENDIAN_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/implementors b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/permutations.c b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/permutations.c new file mode 100644 index 0000000..9aaf9d1 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/permutations.c @@ -0,0 +1,8 @@ +#include "permutations.h" + +void P(state *p, u8 rounds) { + state s = *p; + u8 i, start = START_CONSTANT(rounds); + for (i = start; i > 0x4a; i -= 0x0f) ROUND(i); + *p = s; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/permutations.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/permutations.h new file mode 100644 index 0000000..7143e82 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/permutations.h @@ -0,0 +1,66 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +typedef unsigned char u8; +typedef unsigned long long u64; + +typedef struct { + u64 x0, x1, x2, x3, x4; +} state; + +#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) +#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) +#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) +#define START_CONSTANT(x) (((0xf - (12 - (x))) << 4) | (12 - (x))) + +#define ROUND(C) \ + do { \ + state t; \ + s.x2 ^= C; \ + s.x0 ^= s.x4; \ + s.x4 ^= s.x3; \ + s.x2 ^= s.x1; \ + t.x0 = s.x0; \ + t.x4 = s.x4; \ + t.x3 = s.x3; \ + t.x1 = s.x1; \ + t.x2 = s.x2; \ + s.x0 = t.x0 ^ ((~t.x1) & t.x2); \ + s.x2 = t.x2 ^ ((~t.x3) & t.x4); \ + s.x4 = t.x4 ^ ((~t.x0) & t.x1); \ + s.x1 = t.x1 ^ ((~t.x2) & t.x3); \ + s.x3 = t.x3 ^ ((~t.x4) & t.x0); \ + s.x1 ^= s.x0; \ + t.x1 = s.x1; \ + s.x1 = ROTR64(s.x1, 39); \ + s.x3 ^= s.x2; \ + t.x2 = s.x2; \ + s.x2 = ROTR64(s.x2, 1); \ + t.x4 = s.x4; \ + t.x2 ^= s.x2; \ + s.x2 = ROTR64(s.x2, 6 - 1); \ + t.x3 = s.x3; \ + t.x1 ^= s.x1; \ + s.x3 = ROTR64(s.x3, 10); \ + s.x0 ^= s.x4; \ + s.x4 = ROTR64(s.x4, 7); \ + t.x3 ^= s.x3; \ + s.x2 ^= t.x2; \ + s.x1 = ROTR64(s.x1, 61 - 39); \ + t.x0 = s.x0; \ + s.x2 = ~s.x2; \ + s.x3 = ROTR64(s.x3, 17 - 10); \ + t.x4 ^= s.x4; \ + s.x4 = ROTR64(s.x4, 41 - 7); \ + s.x3 ^= t.x3; \ + s.x1 ^= t.x1; \ + s.x0 = ROTR64(s.x0, 19); \ + s.x4 ^= t.x4; \ + t.x0 ^= s.x0; \ + s.x0 = ROTR64(s.x0, 28 - 19); \ + s.x0 ^= t.x0; \ + } while (0) + +void P(state *p, u8 rounds); + +#endif // PERMUTATIONS_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/api.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/core.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/core.c new file mode 100644 index 0000000..79db104 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/core.c @@ -0,0 +1,93 @@ +#include "core.h" + +void process_data(state* s, unsigned char* out, const unsigned char* in, + unsigned long long len, u8 mode) { + u32_2 t0; + u64 tmp0; + u64 i; + + while (len >= RATE) { + tmp0 = U64BIG(*(u64*)in); + t0 = to_bit_interleaving(tmp0); + s->x0.e ^= t0.e; + s->x0.o ^= t0.o; + if (mode != ASCON_AD) { + tmp0 = from_bit_interleaving(s->x0); + *(u64*)out = U64BIG(tmp0); + } + if (mode == ASCON_DEC) s->x0 = t0; + P(s, PB_ROUNDS); + in += RATE; + out += RATE; + len -= RATE; + } + + tmp0 = 0; + for (i = 0; i < len; ++i, ++in) tmp0 |= INS_BYTE64(*in, i); + in -= len; + tmp0 |= INS_BYTE64(0x80, len); + t0 = to_bit_interleaving(tmp0); + s->x0.e ^= t0.e; + s->x0.o ^= t0.o; + if (mode != ASCON_AD) { + tmp0 = from_bit_interleaving(s->x0); + for (i = 0; i < len; ++i, ++out) *out = EXT_BYTE64(tmp0, i); + } + if (mode == ASCON_DEC) { + for (i = 0; i < len; ++i, ++in) { + tmp0 &= ~INS_BYTE64(0xff, i); + tmp0 |= INS_BYTE64(*in, i); + } + s->x0 = to_bit_interleaving(tmp0); + } +} + +void ascon_core(state* s, unsigned char* out, const unsigned char* in, + unsigned long long tlen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k, u8 mode) { + u32_2 K0, K1, N0, N1; + + // load key and nonce + K0 = to_bit_interleaving(U64BIG(*(u64*)k)); + K1 = to_bit_interleaving(U64BIG(*(u64*)(k + 8))); + N0 = to_bit_interleaving(U64BIG(*(u64*)npub)); + N1 = to_bit_interleaving(U64BIG(*(u64*)(npub + 8))); + + // initialization + s->x0 = to_bit_interleaving(IV); + s->x1.o = K0.o; + s->x1.e = K0.e; + s->x2.e = K1.e; + s->x2.o = K1.o; + s->x3.e = N0.e; + s->x3.o = N0.o; + s->x4.e = N1.e; + s->x4.o = N1.o; + P(s, PA_ROUNDS); + s->x3.e ^= K0.e; + s->x3.o ^= K0.o; + s->x4.e ^= K1.e; + s->x4.o ^= K1.o; + + // process associated data + if (adlen) { + process_data(s, (void*)0, ad, adlen, ASCON_AD); + P(s, PB_ROUNDS); + } + s->x4.e ^= 1; + + // process plaintext/ciphertext + process_data(s, out, in, tlen, mode); + + // finalization + s->x1.e ^= K0.e; + s->x1.o ^= K0.o; + s->x2.e ^= K1.e; + s->x2.o ^= K1.o; + P(s, PA_ROUNDS); + s->x3.e ^= K0.e; + s->x3.o ^= K0.o; + s->x4.e ^= K1.e; + s->x4.o ^= K1.o; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/core.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/core.h new file mode 100644 index 0000000..90076c1 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/core.h @@ -0,0 +1,27 @@ +#ifndef CORE_H_ +#define CORE_H_ + +#include "api.h" +#include "endian.h" +#include "permutations.h" + +#define ASCON_AD 0 +#define ASCON_ENC 1 +#define ASCON_DEC 2 + +#define RATE (64 / 8) +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define IV \ + ((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \ + (u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32) + +void process_data(state* s, unsigned char* out, const unsigned char* in, + unsigned long long len, u8 mode); + +void ascon_core(state* s, unsigned char* out, const unsigned char* in, + unsigned long long tlen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k, u8 mode); + +#endif // CORE_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/decrypt.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/decrypt.c new file mode 100644 index 0000000..7e9dd1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/decrypt.c @@ -0,0 +1,32 @@ +#include "core.h" + +int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen, + unsigned char* nsec, const unsigned char* c, + unsigned long long clen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k) { + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + + state s; + u32_2 t0, t1; + (void)nsec; + + // set plaintext size + *mlen = clen - CRYPTO_ABYTES; + + ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC); + + // verify tag (should be constant time, check compiler output) + t0 = to_bit_interleaving(U64BIG(*(u64*)(c + *mlen))); + t1 = to_bit_interleaving(U64BIG(*(u64*)(c + *mlen + 8))); + if (((s.x3.e ^ t0.e) | (s.x3.o ^ t0.o) | (s.x4.e ^ t1.e) | (s.x4.o ^ t1.o)) != + 0) { + *mlen = 0; + return -1; + } + + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/encrypt.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/encrypt.c new file mode 100644 index 0000000..b5dc587 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/encrypt.c @@ -0,0 +1,24 @@ +#include "core.h" + +int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, const unsigned char* npub, + const unsigned char* k) { + state s; + u64 tmp0, tmp1; + (void)nsec; + + // set ciphertext size + *clen = mlen + CRYPTO_ABYTES; + + ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC); + + // set tag + tmp0 = from_bit_interleaving(s.x3); + *(u64*)(c + mlen) = U64BIG(tmp0); + tmp1 = from_bit_interleaving(s.x4); + *(u64*)(c + mlen + 8) = U64BIG(tmp1); + + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/endian.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/endian.h new file mode 100644 index 0000000..b4d18f5 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/endian.h @@ -0,0 +1,29 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +// macros for big endian machines +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +// macros for little endian machines +#define U64BIG(x) \ + ((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \ + (((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \ + (((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \ + (((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56)) +#define U32BIG(x) \ + ((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \ + (((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24)) +#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8)) + +#else +#error "ascon byte order macros not defined in endian.h" +#endif + +#endif // ENDIAN_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/implementors b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/permutations.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/permutations.c new file mode 100644 index 0000000..bc47f5f --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/permutations.c @@ -0,0 +1,50 @@ +#include "permutations.h" + +static const u8 constants[][2] = { + {0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, {0x6, 0xc}, {0x3, 0xc}, + {0x6, 0x9}, {0x3, 0x9}, {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 +u32_2 to_bit_interleaving(u64 in) { + u32 hi = (in) >> 32; + u32 lo = (u32)(in); + u32 r0, r1; + u32_2 out; + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + out.e = (lo & 0x0000FFFF) | (hi << 16); + out.o = (lo >> 16) | (hi & 0xFFFF0000); + return out; +} + +// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 +u64 from_bit_interleaving(u32_2 in) { + u32 lo = (in.e & 0x0000FFFF) | (in.o << 16); + u32 hi = (in.e >> 16) | (in.o & 0xFFFF0000); + u32 r0, r1; + u64 out; + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + out = (u64)hi << 32 | lo; + return out; +} + +void P(state *p, u8 rounds) { + state s = *p; + u32_2 t0, t1, t2, t3, t4; + u32 i, start = START_ROUND(rounds); + for (i = start; i < 12; i++) ROUND(constants[i][0], constants[i][1]); + *p = s; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/permutations.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/permutations.h new file mode 100644 index 0000000..bc643ce --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/permutations.h @@ -0,0 +1,71 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; + +typedef struct { + u32 e; + u32 o; +} u32_2; + +typedef struct { + u32_2 x0; + u32_2 x1; + u32_2 x2; + u32_2 x3; + u32_2 x4; +} state; + +#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) +#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) +#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) +#define START_ROUND(x) (12 - (x)) + +// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 +u32_2 to_bit_interleaving(u64 in); + +// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 +u64 from_bit_interleaving(u32_2 in); + +/* clang-format off */ +#define ROUND(C_e, C_o) \ + do { \ + /* round constant */ \ + s.x2.e ^= C_e; s.x2.o ^= C_o; \ + /* s-box layer */ \ + s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \ + s.x4.e ^= s.x3.e; s.x4.o ^= s.x3.o; \ + s.x2.e ^= s.x1.e; s.x2.o ^= s.x1.o; \ + t0.e = s.x0.e; t0.o = s.x0.o; \ + t4.e = s.x4.e; t4.o = s.x4.o; \ + t3.e = s.x3.e; t3.o = s.x3.o; \ + t1.e = s.x1.e; t1.o = s.x1.o; \ + t2.e = s.x2.e; t2.o = s.x2.o; \ + s.x0.e = t0.e ^ (~t1.e & t2.e); s.x0.o = t0.o ^ (~t1.o & t2.o); \ + s.x2.e = t2.e ^ (~t3.e & t4.e); s.x2.o = t2.o ^ (~t3.o & t4.o); \ + s.x4.e = t4.e ^ (~t0.e & t1.e); s.x4.o = t4.o ^ (~t0.o & t1.o); \ + s.x1.e = t1.e ^ (~t2.e & t3.e); s.x1.o = t1.o ^ (~t2.o & t3.o); \ + s.x3.e = t3.e ^ (~t4.e & t0.e); s.x3.o = t3.o ^ (~t4.o & t0.o); \ + s.x1.e ^= s.x0.e; s.x1.o ^= s.x0.o; \ + s.x3.e ^= s.x2.e; s.x3.o ^= s.x2.o; \ + s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \ + /* linear layer */ \ + t0.e = s.x0.e ^ ROTR32(s.x0.o, 4); t0.o = s.x0.o ^ ROTR32(s.x0.e, 5); \ + t1.e = s.x1.e ^ ROTR32(s.x1.e, 11); t1.o = s.x1.o ^ ROTR32(s.x1.o, 11); \ + t2.e = s.x2.e ^ ROTR32(s.x2.o, 2); t2.o = s.x2.o ^ ROTR32(s.x2.e, 3); \ + t3.e = s.x3.e ^ ROTR32(s.x3.o, 3); t3.o = s.x3.o ^ ROTR32(s.x3.e, 4); \ + t4.e = s.x4.e ^ ROTR32(s.x4.e, 17); t4.o = s.x4.o ^ ROTR32(s.x4.o, 17); \ + s.x0.e ^= ROTR32(t0.o, 9); s.x0.o ^= ROTR32(t0.e, 10); \ + s.x1.e ^= ROTR32(t1.o, 19); s.x1.o ^= ROTR32(t1.e, 20); \ + s.x2.e ^= t2.o; s.x2.o ^= ROTR32(t2.e, 1); \ + s.x3.e ^= ROTR32(t3.e, 5); s.x3.o ^= ROTR32(t3.o, 5); \ + s.x4.e ^= ROTR32(t4.o, 3); s.x4.o ^= ROTR32(t4.e, 4); \ + s.x2.e = ~s.x2.e; s.x2.o = ~s.x2.o; \ + } while(0) +/* clang-format on */ + +void P(state *p, u8 rounds); + +#endif // PERMUTATIONS_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/api.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/core.c b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/core.c new file mode 100644 index 0000000..48cac3d --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/core.c @@ -0,0 +1,64 @@ +#include "core.h" + +void process_data(state* s, unsigned char* out, const unsigned char* in, + unsigned long long len, u8 mode) { + u64 i; + + while (len >= RATE) { + s->x0 ^= U64BIG(*(u64*)in); + if (mode != ASCON_AD) *(u64*)out = U64BIG(s->x0); + if (mode == ASCON_DEC) s->x0 = U64BIG(*((u64*)in)); + P(s, PB_ROUNDS); + in += RATE; + out += RATE; + len -= RATE; + } + + for (i = 0; i < len; ++i, ++out, ++in) { + s->x0 ^= INS_BYTE64(*in, i); + + if (mode != ASCON_AD) *out = EXT_BYTE64(s->x0, i); + if (mode == ASCON_DEC) { + s->x0 &= ~INS_BYTE64(0xff, i); + s->x0 |= INS_BYTE64(*in, i); + } + } + s->x0 ^= INS_BYTE64(0x80, len); +} + +void ascon_core(state* s, unsigned char* out, const unsigned char* in, + unsigned long long tlen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k, u8 mode) { + const u64 K0 = U64BIG(*(u64*)k); + const u64 K1 = U64BIG(*(u64*)(k + 8)); + const u64 N0 = U64BIG(*(u64*)npub); + const u64 N1 = U64BIG(*(u64*)(npub + 8)); + + // initialization + s->x0 = IV; + s->x1 = K0; + s->x2 = K1; + s->x3 = N0; + s->x4 = N1; + P(s, PA_ROUNDS); + s->x3 ^= K0; + s->x4 ^= K1; + + // process associated data + if (adlen) { + process_data(s, (void*)0, ad, adlen, ASCON_AD); + P(s, PB_ROUNDS); + } + s->x4 ^= 1; + + // process plaintext/ciphertext + process_data(s, out, in, tlen, mode); + + // finalization + s->x1 ^= K0; + s->x2 ^= K1; + P(s, PA_ROUNDS); + s->x3 ^= K0; + s->x4 ^= K1; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/core.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/core.h new file mode 100644 index 0000000..90076c1 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/core.h @@ -0,0 +1,27 @@ +#ifndef CORE_H_ +#define CORE_H_ + +#include "api.h" +#include "endian.h" +#include "permutations.h" + +#define ASCON_AD 0 +#define ASCON_ENC 1 +#define ASCON_DEC 2 + +#define RATE (64 / 8) +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define IV \ + ((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \ + (u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32) + +void process_data(state* s, unsigned char* out, const unsigned char* in, + unsigned long long len, u8 mode); + +void ascon_core(state* s, unsigned char* out, const unsigned char* in, + unsigned long long tlen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k, u8 mode); + +#endif // CORE_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/decrypt.c b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/decrypt.c new file mode 100644 index 0000000..0cde81e --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/decrypt.c @@ -0,0 +1,29 @@ +#include "core.h" + +int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen, + unsigned char* nsec, const unsigned char* c, + unsigned long long clen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k) { + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + + state s; + (void)nsec; + + // set plaintext size + *mlen = clen - CRYPTO_ABYTES; + + ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC); + + // verify tag (should be constant time, check compiler output) + if (((s.x3 ^ U64BIG(*(u64*)(c + *mlen))) | + (s.x4 ^ U64BIG(*(u64*)(c + *mlen + 8)))) != 0) { + *mlen = 0; + return -1; + } + + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/encrypt.c b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/encrypt.c new file mode 100644 index 0000000..5961c60 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/encrypt.c @@ -0,0 +1,21 @@ +#include "core.h" + +int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, const unsigned char* npub, + const unsigned char* k) { + state s; + (void)nsec; + + // set ciphertext size + *clen = mlen + CRYPTO_ABYTES; + + ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC); + + // set tag + *(u64*)(c + mlen) = U64BIG(s.x3); + *(u64*)(c + mlen + 8) = U64BIG(s.x4); + + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/endian.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/endian.h new file mode 100644 index 0000000..b4d18f5 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/endian.h @@ -0,0 +1,29 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +// macros for big endian machines +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +// macros for little endian machines +#define U64BIG(x) \ + ((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \ + (((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \ + (((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \ + (((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56)) +#define U32BIG(x) \ + ((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \ + (((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24)) +#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8)) + +#else +#error "ascon byte order macros not defined in endian.h" +#endif + +#endif // ENDIAN_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/implementors b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/permutations.c b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/permutations.c new file mode 100644 index 0000000..9aaf9d1 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/permutations.c @@ -0,0 +1,8 @@ +#include "permutations.h" + +void P(state *p, u8 rounds) { + state s = *p; + u8 i, start = START_CONSTANT(rounds); + for (i = start; i > 0x4a; i -= 0x0f) ROUND(i); + *p = s; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/permutations.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/permutations.h new file mode 100644 index 0000000..7143e82 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/permutations.h @@ -0,0 +1,66 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +typedef unsigned char u8; +typedef unsigned long long u64; + +typedef struct { + u64 x0, x1, x2, x3, x4; +} state; + +#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) +#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) +#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) +#define START_CONSTANT(x) (((0xf - (12 - (x))) << 4) | (12 - (x))) + +#define ROUND(C) \ + do { \ + state t; \ + s.x2 ^= C; \ + s.x0 ^= s.x4; \ + s.x4 ^= s.x3; \ + s.x2 ^= s.x1; \ + t.x0 = s.x0; \ + t.x4 = s.x4; \ + t.x3 = s.x3; \ + t.x1 = s.x1; \ + t.x2 = s.x2; \ + s.x0 = t.x0 ^ ((~t.x1) & t.x2); \ + s.x2 = t.x2 ^ ((~t.x3) & t.x4); \ + s.x4 = t.x4 ^ ((~t.x0) & t.x1); \ + s.x1 = t.x1 ^ ((~t.x2) & t.x3); \ + s.x3 = t.x3 ^ ((~t.x4) & t.x0); \ + s.x1 ^= s.x0; \ + t.x1 = s.x1; \ + s.x1 = ROTR64(s.x1, 39); \ + s.x3 ^= s.x2; \ + t.x2 = s.x2; \ + s.x2 = ROTR64(s.x2, 1); \ + t.x4 = s.x4; \ + t.x2 ^= s.x2; \ + s.x2 = ROTR64(s.x2, 6 - 1); \ + t.x3 = s.x3; \ + t.x1 ^= s.x1; \ + s.x3 = ROTR64(s.x3, 10); \ + s.x0 ^= s.x4; \ + s.x4 = ROTR64(s.x4, 7); \ + t.x3 ^= s.x3; \ + s.x2 ^= t.x2; \ + s.x1 = ROTR64(s.x1, 61 - 39); \ + t.x0 = s.x0; \ + s.x2 = ~s.x2; \ + s.x3 = ROTR64(s.x3, 17 - 10); \ + t.x4 ^= s.x4; \ + s.x4 = ROTR64(s.x4, 41 - 7); \ + s.x3 ^= t.x3; \ + s.x1 ^= t.x1; \ + s.x0 = ROTR64(s.x0, 19); \ + s.x4 ^= t.x4; \ + t.x0 ^= s.x0; \ + s.x0 = ROTR64(s.x0, 28 - 19); \ + s.x0 ^= t.x0; \ + } while (0) + +void P(state *p, u8 rounds); + +#endif // PERMUTATIONS_H_ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/api.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/api.h new file mode 100644 index 0000000..4b53d6c --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 20 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/core.c b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/core.c new file mode 100644 index 0000000..88bcb45 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/core.c @@ -0,0 +1,67 @@ +#include "core.h" + +void process_data(state* s, unsigned char* out, const unsigned char* in, + unsigned long long len, u8 mode) { + u64 i; + + while (len >= RATE) { + s->x0 ^= U64BIG(*(u64*)in); + if (mode != ASCON_AD) *(u64*)out = U64BIG(s->x0); + if (mode == ASCON_DEC) s->x0 = U64BIG(*((u64*)in)); + P(s, PB_ROUNDS); + in += RATE; + out += RATE; + len -= RATE; + } + + for (i = 0; i < len; ++i, ++out, ++in) { + s->x0 ^= INS_BYTE64(*in, i); + + if (mode != ASCON_AD) *out = EXT_BYTE64(s->x0, i); + if (mode == ASCON_DEC) { + s->x0 &= ~INS_BYTE64(0xff, i); + s->x0 |= INS_BYTE64(*in, i); + } + } + s->x0 ^= INS_BYTE64(0x80, len); +} + +void ascon_core(state* s, unsigned char* out, const unsigned char* in, + unsigned long long tlen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k, u8 mode) { + const u64 K0 = U64BIG(*(u64*)(k + 0)) >> 32; + const u64 K1 = U64BIG(*(u64*)(k + 4)); + const u64 K2 = U64BIG(*(u64*)(k + 12)); + const u64 N0 = U64BIG(*(u64*)npub); + const u64 N1 = U64BIG(*(u64*)(npub + 8)); + + // initialization + s->x0 = IV | K0; + s->x1 = K1; + s->x2 = K2; + s->x3 = N0; + s->x4 = N1; + P(s, PA_ROUNDS); + s->x2 ^= K0; + s->x3 ^= K1; + s->x4 ^= K2; + + // process associated data + if (adlen) { + process_data(s, (void*)0, ad, adlen, ASCON_AD); + P(s, PB_ROUNDS); + } + s->x4 ^= 1; + + // process plaintext/ciphertext + process_data(s, out, in, tlen, mode); + + // finalization + s->x1 ^= K0 << 32 | K1 >> 32; + s->x2 ^= K1 << 32 | K2 >> 32; + s->x3 ^= K2 << 32; + P(s, PA_ROUNDS); + s->x3 ^= K1; + s->x4 ^= K2; +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/core.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/core.h new file mode 100644 index 0000000..90076c1 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/core.h @@ -0,0 +1,27 @@ +#ifndef CORE_H_ +#define CORE_H_ + +#include "api.h" +#include "endian.h" +#include "permutations.h" + +#define ASCON_AD 0 +#define ASCON_ENC 1 +#define ASCON_DEC 2 + +#define RATE (64 / 8) +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define IV \ + ((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \ + (u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32) + +void process_data(state* s, unsigned char* out, const unsigned char* in, + unsigned long long len, u8 mode); + +void ascon_core(state* s, unsigned char* out, const unsigned char* in, + unsigned long long tlen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k, u8 mode); + +#endif // CORE_H_ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/decrypt.c b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/decrypt.c new file mode 100644 index 0000000..0cde81e --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/decrypt.c @@ -0,0 +1,29 @@ +#include "core.h" + +int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen, + unsigned char* nsec, const unsigned char* c, + unsigned long long clen, const unsigned char* ad, + unsigned long long adlen, const unsigned char* npub, + const unsigned char* k) { + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + + state s; + (void)nsec; + + // set plaintext size + *mlen = clen - CRYPTO_ABYTES; + + ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC); + + // verify tag (should be constant time, check compiler output) + if (((s.x3 ^ U64BIG(*(u64*)(c + *mlen))) | + (s.x4 ^ U64BIG(*(u64*)(c + *mlen + 8)))) != 0) { + *mlen = 0; + return -1; + } + + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/encrypt.c b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/encrypt.c new file mode 100644 index 0000000..5961c60 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/encrypt.c @@ -0,0 +1,21 @@ +#include "core.h" + +int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, const unsigned char* npub, + const unsigned char* k) { + state s; + (void)nsec; + + // set ciphertext size + *clen = mlen + CRYPTO_ABYTES; + + ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC); + + // set tag + *(u64*)(c + mlen) = U64BIG(s.x3); + *(u64*)(c + mlen + 8) = U64BIG(s.x4); + + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/endian.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/endian.h new file mode 100644 index 0000000..b4d18f5 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/endian.h @@ -0,0 +1,29 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +// macros for big endian machines +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +// macros for little endian machines +#define U64BIG(x) \ + ((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \ + (((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \ + (((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \ + (((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56)) +#define U32BIG(x) \ + ((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \ + (((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24)) +#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8)) + +#else +#error "ascon byte order macros not defined in endian.h" +#endif + +#endif // ENDIAN_H_ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/implementors b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/permutations.c b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/permutations.c new file mode 100644 index 0000000..9aaf9d1 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/permutations.c @@ -0,0 +1,8 @@ +#include "permutations.h" + +void P(state *p, u8 rounds) { + state s = *p; + u8 i, start = START_CONSTANT(rounds); + for (i = start; i > 0x4a; i -= 0x0f) ROUND(i); + *p = s; +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/permutations.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/permutations.h new file mode 100644 index 0000000..7143e82 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/permutations.h @@ -0,0 +1,66 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +typedef unsigned char u8; +typedef unsigned long long u64; + +typedef struct { + u64 x0, x1, x2, x3, x4; +} state; + +#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) +#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) +#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) +#define START_CONSTANT(x) (((0xf - (12 - (x))) << 4) | (12 - (x))) + +#define ROUND(C) \ + do { \ + state t; \ + s.x2 ^= C; \ + s.x0 ^= s.x4; \ + s.x4 ^= s.x3; \ + s.x2 ^= s.x1; \ + t.x0 = s.x0; \ + t.x4 = s.x4; \ + t.x3 = s.x3; \ + t.x1 = s.x1; \ + t.x2 = s.x2; \ + s.x0 = t.x0 ^ ((~t.x1) & t.x2); \ + s.x2 = t.x2 ^ ((~t.x3) & t.x4); \ + s.x4 = t.x4 ^ ((~t.x0) & t.x1); \ + s.x1 = t.x1 ^ ((~t.x2) & t.x3); \ + s.x3 = t.x3 ^ ((~t.x4) & t.x0); \ + s.x1 ^= s.x0; \ + t.x1 = s.x1; \ + s.x1 = ROTR64(s.x1, 39); \ + s.x3 ^= s.x2; \ + t.x2 = s.x2; \ + s.x2 = ROTR64(s.x2, 1); \ + t.x4 = s.x4; \ + t.x2 ^= s.x2; \ + s.x2 = ROTR64(s.x2, 6 - 1); \ + t.x3 = s.x3; \ + t.x1 ^= s.x1; \ + s.x3 = ROTR64(s.x3, 10); \ + s.x0 ^= s.x4; \ + s.x4 = ROTR64(s.x4, 7); \ + t.x3 ^= s.x3; \ + s.x2 ^= t.x2; \ + s.x1 = ROTR64(s.x1, 61 - 39); \ + t.x0 = s.x0; \ + s.x2 = ~s.x2; \ + s.x3 = ROTR64(s.x3, 17 - 10); \ + t.x4 ^= s.x4; \ + s.x4 = ROTR64(s.x4, 41 - 7); \ + s.x3 ^= t.x3; \ + s.x1 ^= t.x1; \ + s.x0 = ROTR64(s.x0, 19); \ + s.x4 ^= t.x4; \ + t.x0 ^= s.x0; \ + s.x0 = ROTR64(s.x0, 28 - 19); \ + s.x0 ^= t.x0; \ + } while (0) + +void P(state *p, u8 rounds); + +#endif // PERMUTATIONS_H_ diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_3/api.h b/knot/Implementations/crypto_aead/knot128v1/armcortexm_3/api.h new file mode 100644 index 0000000..6e591c3 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_3/api.h @@ -0,0 +1,9 @@ + +#define CRYPTO_KEYBYTES 16 // +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 + + + diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_3/auxFormat.h b/knot/Implementations/crypto_aead/knot128v1/armcortexm_3/auxFormat.h new file mode 100644 index 0000000..50bc7a8 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_3/auxFormat.h @@ -0,0 +1,68 @@ +#include"crypto_aead.h" +#include"api.h" +#include +#define U32BIG(x) (x) + + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) + + +#define sbox(a, b, c, d, e, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; e = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; +#define packFormat(out,in) {\ +t1 = U32BIG(((u32*)in)[0]); \ +t2 = U32BIG(((u32*)in)[1]); \ +t3 = (t1 ^ (t1 >> 1)) & 0x22222222, t1 ^= t3 ^ (t3 << 1); \ +t3 = (t1 ^ (t1 >> 2)) & 0x0C0C0C0C, t1 ^= t3 ^ (t3 << 2); \ +t3 = (t1 ^ (t1 >> 4)) & 0x00F000F0, t1 ^= t3 ^ (t3 << 4); \ +t3 = (t1 ^ (t1 >> 8)) & 0x0000FF00, t1 ^= t3 ^ (t3 << 8); \ +t5 = (t2 ^ (t2 >> 1)) & 0x22222222, t2 ^= t5 ^ (t5 << 1); \ +t5 = (t2 ^ (t2 >> 2)) & 0x0C0C0C0C, t2 ^= t5 ^ (t5 << 2); \ +t5 = (t2 ^ (t2 >> 4)) & 0x00F000F0, t2 ^= t5 ^ (t5 << 4); \ +t5 = (t2 ^ (t2 >> 8)) & 0x0000FF00, t2 ^= t5 ^ (t5 << 8); \ +out[0] = (t2 & 0xFFFF0000) | (t1 >> 16); \ +out[1] = (t2 << 16) | (t1 & 0x0000FFFF); \ +} +#define unpackFormat(out, in) {\ + t2 = (in[0] & 0xFFFF0000) | (in[1] >> 16); \ + t1 = (in[1] & 0x0000FFFF) | (in[0] << 16); \ + t3 = (t1 ^ (t1 >> 8)) & 0x0000FF00, t1 ^= t3 ^ (t3 << 8); \ + t3 = (t1 ^ (t1 >> 4)) & 0x00F000F0, t1 ^= t3 ^ (t3 << 4); \ + t3 = (t1 ^ (t1 >> 2)) & 0x0C0C0C0C, t1 ^= t3 ^ (t3 << 2); \ + t3 = (t1 ^ (t1 >> 1)) & 0x22222222, t1 ^= t3 ^ (t3 << 1); \ + t5 = (t2 ^ (t2 >> 8)) & 0x0000FF00, t2 ^= t5 ^ (t5 << 8); \ + t5 = (t2 ^ (t2 >> 4)) & 0x00F000F0, t2 ^= t5 ^ (t5 << 4); \ + t5 = (t2 ^ (t2 >> 2)) & 0x0C0C0C0C, t2 ^= t5 ^ (t5 << 2); \ + t5 = (t2 ^ (t2 >> 1)) & 0x22222222, t2 ^= t5 ^ (t5 << 1); \ + *((u64*)out) = ((u64)t2 << 32 | t1); \ +} +#define getU32Format(out, in) {\ + t1, t2 = U32BIG(((u32*)in)[0]); \ + t1 = (t2 ^ (t2 >> 1)) & 0x22222222, t2 ^= t1 ^ (t1 << 1); \ + t1 = (t2 ^ (t2 >> 2)) & 0x0C0C0C0C, t2 ^= t1 ^ (t1 << 2); \ + t1 = (t2 ^ (t2 >> 4)) & 0x00F000F0, t2 ^= t1 ^ (t1 << 4); \ + t1 = (t2 ^ (t2 >> 8)) & 0x0000FF00, t2 ^= t1 ^ (t1 << 8); \ + *out = t2; \ +} +#define ROUND256( constant6Format,lunNum) {\ + s[0] ^= constant6Format[lunNum]>> 4;\ + s[1] ^= constant6Format[lunNum]& 0x0f;\ + sbox(s[0], s[2], s[4], s[6], s_temp[0], s_temp[2], s_temp[4], s_temp[6]);\ + sbox(s[1], s[3], s[5], s[7], s_temp[1], s_temp[3], s_temp[5], s_temp[7]);\ + s[0] = s_temp[0];\ + s[1] = s_temp[1];\ + s[2] = s_temp[3];\ + s[3] = LOTR32(s_temp[2], 1);\ + s[4] = LOTR32(s_temp[4], 4);\ + s[5] = LOTR32(s_temp[5], 4);\ + s[6] = LOTR32(s_temp[7], 12);\ + s[7] = LOTR32(s_temp[6], 13);\ +} + diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_3/crypto_aead.h b/knot/Implementations/crypto_aead/knot128v1/armcortexm_3/crypto_aead.h new file mode 100644 index 0000000..862d176 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_3/crypto_aead.h @@ -0,0 +1,18 @@ + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +); diff --git a/knot/Implementations/crypto_aead/knot128v1/armcortexm_3/encrypt.c b/knot/Implementations/crypto_aead/knot128v1/armcortexm_3/encrypt.c new file mode 100644 index 0000000..91f9f42 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v1/armcortexm_3/encrypt.c @@ -0,0 +1,246 @@ + +#include"auxFormat.h" + +#define RATE (64 / 8) + +#define PR0_ROUNDS 52 +#define PR_ROUNDS 28 +#define PRF_ROUNDS 32 +unsigned char constant6Format[63] = { + /*constant6_aead_128v1:*/ +0x1, +0x10, +0x2, +0x20, +0x4, +0x41, +0x11, +0x12, +0x22, +0x24, +0x45, +0x50, +0x3, +0x30, +0x6, +0x61, +0x15, +0x53, +0x33, +0x36, +0x67, +0x74, +0x46, +0x60, +0x5, +0x51, +0x13, +0x32, +0x26, +0x65, +0x54, +0x42, +0x21, +0x14, +0x43, +0x31, +0x16, +0x63, +0x35, +0x57, +0x72, +0x27, +0x75, +0x56, +0x62, +0x25, +0x55, +0x52, +0x23, +0x34, +0x47, +0x70, +0x7, +0x71, +0x17, +0x73, +0x37, +0x77, +0x76, +0x66, +0x64, +0x44, +0x40, + +}; + + + +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + unsigned int i, j; + u32 s[8] = { 0 }; + u32 dataFormat[2] = { 0 }; + u8 tempData[8]; + u32 s_temp[8] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + *clen = mlen + CRYPTO_ABYTES; + //initialization + packFormat(s, npub); + packFormat((s + 2), (npub + 8)); + packFormat((s + 4), k); + packFormat((s + 6), (k + 8)); + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND256(constant6Format,i); + } + // process associated data + if (adlen) { + //rlen = adlen; + while (adlen >= RATE) { + packFormat(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND256(constant6Format, i); + } + adlen -= RATE; + ad += RATE; + } + memset(tempData, 0, sizeof(tempData)); +memcpy(tempData, ad, adlen * sizeof(unsigned char)); +tempData[adlen] = 0x01; + packFormat(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND256(constant6Format, i); + } + } + s[6] ^= 0x80000000; + if (mlen) { + while (mlen >= RATE) { + packFormat(dataFormat, m); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + unpackFormat(c, s); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND256(constant6Format, i); + } + mlen -= RATE; + m += RATE; + c += RATE; + } + memset(tempData, 0, sizeof(tempData)); +memcpy(tempData, m, mlen * sizeof(unsigned char)); + +tempData[mlen]= 0x01; + packFormat(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + unpackFormat(tempData, s); + memcpy(c, tempData, mlen * sizeof(unsigned char)); + c +=mlen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND256(constant6Format, i); + } + // return tag + unpackFormat(tempData, s); + memcpy(c, tempData, sizeof(tempData)); + unpackFormat(tempData,(s + 2)); + memcpy(c+8, tempData, sizeof(tempData)); +// unpackFormat((c), s); +// unpackFormat((c+8),(s + 2)); + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + u8 i, j; + // initialization + //256/32=8 + u32 s[8] = { 0 }; + u32 dataFormat[4] = { 0 }; + u32 dataFormat_1[2] = { 0 }; + u8 tempU8[32] = { 0 }; + u8 tempData[8]; + u32 s_temp[8] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + //initialization + packFormat(s, npub); + packFormat((s + 2), (npub + 8)); + packFormat((s + 4), k); + packFormat((s + 6), (k + 8)); + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND256(constant6Format, i); + } + // process associated data + if (adlen) { + while (adlen >= RATE) { + packFormat(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND256(constant6Format, i); + } + adlen -= RATE; + ad += RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packFormat(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND256(constant6Format, i); + } + } + s[6] ^= 0x80000000; + // process c + clen = clen - CRYPTO_KEYBYTES; + if (clen) { + while (clen >= RATE) { + packFormat(dataFormat, c); + dataFormat_1[0] = s[0] ^ dataFormat[0]; + dataFormat_1[1] = s[1] ^ dataFormat[1]; + unpackFormat(m, dataFormat_1); + s[0] = dataFormat[0]; + s[1] = dataFormat[1]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND256(constant6Format, i); + } + clen -= RATE; + m += RATE; + c += RATE; + } + unpackFormat(tempU8, s); + for (i = 0; i < clen; ++i, ++m, ++c) + { + *m = tempU8[i]^ *c; + tempU8[i] = *c; + } + tempU8[i] ^= 0x01; + packFormat(s, tempU8); + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND256(constant6Format, i); + } + // return tag + packFormat(dataFormat, c); + packFormat((dataFormat + 2), (c +8)); + if (dataFormat[0] != s[0] || dataFormat[1] != s[1] || dataFormat[2] != s[2] || dataFormat[3] != s[3]) { + return -1; + } + return 0; +} diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/api.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/api.h new file mode 100644 index 0000000..084356b --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/api.h @@ -0,0 +1,8 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 + + + diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/auxFormat.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/auxFormat.h new file mode 100644 index 0000000..45ad28d --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/auxFormat.h @@ -0,0 +1,142 @@ +//#include +#include"crypto_aead.h" +#include"api.h" +#include +#include +#include +#define U32BIG(x) (x) + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) + + +#define puckU32ToThree(x){\ +x &= 0x92492492;\ +x = (x | (x << 2)) & 0xc30c30c3;\ +x = (x | (x << 4)) & 0xf00f00f0;\ +x = (x | (x << 8)) & 0xff0000ff;\ +x = (x | (x << 16)) & 0xfff00000;\ +} +#define unpuckU32ToThree(x){\ +x &= 0xfff00000;\ +x = (x | (x >> 16)) & 0xff0000ff;\ +x = (x | (x >> 8)) & 0xf00f00f0;\ +x = (x | (x >> 4)) & 0xc30c30c3;\ +x = (x | (x >> 2)) & 0x92492492;\ +} + +#define packU32FormatToThreePacket( out, in) {\ +t2 = U32BIG(((u32*)in)[0]); \ +t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \ +t2 = t2 << 2; \ +temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \ +puckU32ToThree(temp2[0]); \ +puckU32ToThree(temp2[1]); \ +puckU32ToThree(temp2[2]); \ +out[0] = (temp2[0] >> 22); \ +out[1] = (((u32)t2_64) << 10) | (temp2[1] >> 22); \ +out[2] =(((u32)t2_65) << 10) | (temp2[2] >> 22); \ +} + +#define packU96FormatToThreePacket(out, in) {\ +t9 = U32BIG(((u32*)in)[2]); \ +t1 = U32BIG(((u32*)in)[1]); \ +t2 = U32BIG(((u32*)in)[0]); \ +t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \ +t1 = t1 << 1; \ +t2 = t2 << 2; \ +temp0[0] = t9; temp0[1] = t9 << 1; temp0[2] = t9 << 2; \ +puckU32ToThree(temp0[0]); \ +puckU32ToThree(temp0[1]); \ +puckU32ToThree(temp0[2]); \ +temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; \ +puckU32ToThree(temp1[0]); \ +puckU32ToThree(temp1[1]); \ +puckU32ToThree(temp1[2]); \ +temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \ +puckU32ToThree(temp2[0]); \ +puckU32ToThree(temp2[1]); \ +puckU32ToThree(temp2[2]); \ +out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22); \ +out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); \ +out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); \ +} + +#define unpackU32FormatToThreePacket(out, in) {\ +temp2[0] = (in[0] & 0x000003ff) << 22; \ +t2_64 = ((in[1] & 0x00000400) << 21); \ +temp2[1] = (in[1] & 0x000003ff) << 22; \ +t2_65 = ((in[2] & 0x00000400) << 20); \ +temp2[2] = (in[2] & 0x000003ff) << 22; \ +unpuckU32ToThree(temp2[0]); \ +unpuckU32ToThree(temp2[1]); \ +unpuckU32ToThree(temp2[2]); \ +t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); \ +*(u32*)(out) = U32BIG(t2); \ +} + +#define unpackU96FormatToThreePacket( out, in) {\ +temp0[0] = in[0] & 0xffe00000; \ +temp1[0] = (in[0] & 0x001ffc00) << 11; \ +temp2[0] = (in[0] & 0x000003ff) << 22; \ +temp0[1] = in[1] & 0xffe00000; \ +temp1[1] = (in[1] & 0x001ff800) << 11; \ +t2_64 = ((in[1] & 0x00000400) << 21); \ +temp2[1] = (in[1] & 0x000003ff) << 22; \ +temp0[2] = in[2] & 0xffc00000; \ +t1_32 = ((in[2] & 0x00200000) << 10); \ +temp1[2] = (in[2] & 0x001ff800) << 11; \ +t2_65 = ((in[2] & 0x00000400) << 20); \ +temp2[2] = (in[2] & 0x000003ff) << 22; \ +unpuckU32ToThree(temp0[0]); \ +unpuckU32ToThree(temp0[1]); \ +unpuckU32ToThree(temp0[2]); \ +t9 = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2; \ +unpuckU32ToThree(temp1[0]); \ +unpuckU32ToThree(temp1[1]); \ +unpuckU32ToThree(temp1[2]); \ +t1 = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1); \ +unpuckU32ToThree(temp2[0]); \ +unpuckU32ToThree(temp2[1]); \ +unpuckU32ToThree(temp2[2]); \ +t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); \ +*(u32*)(out) = U32BIG(t2); \ +*(u32*)(out + 4) = U32BIG(t1); \ +*(u32*)(out + 8) = U32BIG(t9); \ +} + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define sbox(a, b, c, d, e, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; e = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + + +#define U96_BIT_LOTR32_1(t0,t1,t2,t3,t4,t5){\ +t3= t1;\ +t4 = t2;\ +t5 = LOTR32(t0, 1); \ +} +#define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t2, 2);\ +t4 =LOTR32(t0, 3);\ +t5 = LOTR32(t1, 3); \ +} + +#define U96_BIT_LOTR32_55(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t1, 18); \ +t4 = LOTR32(t2, 18);\ +t5 = LOTR32(t0, 19); \ +} +/* +s0 s1 s2 +s3 s4 s5 +s6 s7 s8 +s9 s10 s11 +*/ + + diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/crypto_aead.h b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/crypto_aead.h new file mode 100644 index 0000000..cdfdf19 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/crypto_aead.h @@ -0,0 +1,17 @@ +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +); diff --git a/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/encrypt.c b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/encrypt.c new file mode 100644 index 0000000..1245313 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot128v2/armcortexm_3/encrypt.c @@ -0,0 +1,263 @@ + +#include"auxFormat.h" + + + +#define aead_RATE (192 / 8) +#define PR0_ROUNDS 76 +#define PR_ROUNDS 28 +#define PRF_ROUNDS 32 + +unsigned char constant7Format[127] = { + /*constant7Format[127]:*/ + 0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, + 0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, + 0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99, + 0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b, + 0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92, + 0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8, + 0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,0xcf,0x5e,0xd3,0x9f, + 0xdc,0xc6,0x16,0x91,0x8d,0x4c,0x43,0x1a,0xd0,0x87,0x1c,0xc1, + 0x0f,0x59,0xca,0x57,0x9b,0xdd,0xce,0x56,0x93,0x9d,0xcc,0x46, + 0x13,0x98,0xc5,0x0e,0x51,0x8a,0x55,0x8b,0x5d,0xcb,0x5f,0xdb, + 0xdf,0xde,0xd6,0x96,0x94,0x84,0x04, }; +#define ROUND384(lunNum) {\ +s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;\ +s[1] ^= (constant7Format[lunNum] >> 3) & 0x7;\ +s[2] ^= constant7Format[lunNum] & 0x7;\ +sbox(s[0], s[3], s[6], s[9] , s_temp[0], s_temp[3], s_temp[6], s_temp[9]);\ +sbox(s[1], s[4], s[7], s[10], s_temp[1], s_temp[4], s_temp[7], s_temp[10]);\ +sbox(s[2], s[5], s[8], s[11], s_temp[2], s_temp[5], s_temp[8], s_temp[11]);\ +s[0] = s_temp[0], s[1] = s_temp[1], s[2] = s_temp[2];\ +U96_BIT_LOTR32_1(s_temp[3], s_temp [4], s_temp[ 5], s[3], s[4], s[5]);\ +U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\ +U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\ +} +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + u8 i; + u32 s[12] = { 0 }; + u8 tempData[24] = { 0 }; + u32 dataFormat[6] = { 0 }; + u32 s_temp[12] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + u32 t1_32, t2_64, t2_65; + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + + *clen = mlen + CRYPTO_ABYTES; + // initialization + packU96FormatToThreePacket(s, npub); + memcpy(tempData, npub+12, sizeof(unsigned char)*4); + memcpy(tempData+4, k, sizeof(unsigned char) * 16); + packU96FormatToThreePacket((s + 3), tempData); + packU96FormatToThreePacket((s + 6), (tempData+12)); + + s[9] = 0x80000000; + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND384(i); + } + // process associated data + if (adlen) { + // rlen = adlen; + while (adlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU96FormatToThreePacket((dataFormat+3), (ad+12)); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU96FormatToThreePacket((dataFormat + 3), (tempData + 12)); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + } + s[9] ^= 0x80000000; + if (mlen) { + //rlen = mlen; + while (mlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, m); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU96FormatToThreePacket((dataFormat + 3), (m + 12)); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + unpackU96FormatToThreePacket(c, s); + unpackU96FormatToThreePacket((c+12), (s+3)); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + mlen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, m, mlen * sizeof(unsigned char)); + tempData[mlen]= 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU96FormatToThreePacket((dataFormat + 3), (tempData + 12)); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + unpackU96FormatToThreePacket(tempData, s); + unpackU96FormatToThreePacket((tempData+12), (s+3)); + memcpy(c, tempData, mlen * sizeof(unsigned char)); + c += mlen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND384(i); + } + // return tag + unpackU96FormatToThreePacket(c, s); + unpackU96FormatToThreePacket(tempData, (s + 3)); + + memcpy(c+12, tempData, sizeof(unsigned char) * 4); + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + + u8 i, j; + u32 s[12] = { 0 }; + u32 s_temp[12] = { 0 }; + u32 dataFormat[12] = { 0 }; + u32 dataFormat_1[12] = { 0 }; + u8 tempData[24] = { 0 }; + u8 tempU8[24] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + u32 t1_32, t2_64, t2_65; + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + // initialization + packU96FormatToThreePacket(s, npub); + memcpy(tempData, npub + 12, sizeof(unsigned char) * 4); + memcpy(tempData + 4, k, sizeof(unsigned char) * 16); + packU96FormatToThreePacket((s + 3), tempData); + packU96FormatToThreePacket((s + 6), (tempData + 12)); + + s[9] = 0x80000000; + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND384(i); + } + // process associated data + if (adlen) { + // rlen = adlen; + while (adlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU96FormatToThreePacket((dataFormat + 3), (ad + 12)); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + packU96FormatToThreePacket((dataFormat + 3), (tempData + 12)); + s[3] ^= dataFormat[3]; + s[4] ^= dataFormat[4]; + s[5] ^= dataFormat[5]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + } + s[9] ^= 0x80000000; + /////////// + clen -= CRYPTO_ABYTES; + if (clen) { + while (clen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, c); + dataFormat_1[0] = s[0] ^ dataFormat[0]; + dataFormat_1[1] = s[1] ^ dataFormat[1]; + dataFormat_1[2] = s[2] ^ dataFormat[2]; + packU96FormatToThreePacket((dataFormat+3), (c+12)); + dataFormat_1[3] = s[3] ^ dataFormat[3]; + dataFormat_1[4] = s[4] ^ dataFormat[4]; + dataFormat_1[5] = s[5] ^ dataFormat[5]; + unpackU96FormatToThreePacket(m, dataFormat_1); + unpackU96FormatToThreePacket((m + 12), (dataFormat_1 + 3)); + s[0] = dataFormat[0]; + s[1] = dataFormat[1]; + s[2] = dataFormat[2]; + s[3] = dataFormat[3]; + s[4] = dataFormat[4]; + s[5] = dataFormat[5]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + clen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket((tempU8+12), (s+3)); + for (i = 0; i < clen; ++i, ++m, ++c) + { + *m = tempU8[i] ^ *c; + tempU8[i] = *c; + } + tempU8[i] ^= 0x01; + packU96FormatToThreePacket(s, tempU8); + packU96FormatToThreePacket((s + 3), (tempU8 + 12)); + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND384(i); + } + // return tag + + unpackU96FormatToThreePacket(tempU8, s); + unpackU96FormatToThreePacket((tempU8+12), (s+3)); + if (U32BIG(((u32*)tempU8)[0]) != U32BIG(((u32*)c)[0]) || + U32BIG(((u32*)tempU8)[1]) != U32BIG(((u32*)c)[1]) || + U32BIG(((u32*)tempU8)[2]) != U32BIG(((u32*)c)[2]) || + U32BIG(((u32*)tempU8)[3]) != U32BIG(((u32*)c)[3]) ){ + return -1; + } + return 0; +} diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_3/api.h b/knot/Implementations/crypto_aead/knot192/armcortexm_3/api.h new file mode 100644 index 0000000..c3cb1d9 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_3/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 24 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 24 +#define CRYPTO_ABYTES 24 +#define CRYPTO_NOOVERLAP 1 + diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_3/auxFormat.h b/knot/Implementations/crypto_aead/knot192/armcortexm_3/auxFormat.h new file mode 100644 index 0000000..a617f8e --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_3/auxFormat.h @@ -0,0 +1,128 @@ + +#include"crypto_aead.h" +#include"api.h" +#include +#include +#include +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) + + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define sbox(a, b, c, d, e, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; e = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + +#define puckU32ToThree(x){\ +x &= 0x92492492;\ +x = (x | (x << 2)) & 0xc30c30c3;\ +x = (x | (x << 4)) & 0xf00f00f0;\ +x = (x | (x << 8)) & 0xff0000ff;\ +x = (x | (x << 16)) & 0xfff00000;\ +} +#define unpuckU32ToThree(x){\ +x &= 0xfff00000;\ +x = (x | (x >> 16)) & 0xff0000ff;\ +x = (x | (x >> 8)) & 0xf00f00f0;\ +x = (x | (x >> 4)) & 0xc30c30c3;\ +x = (x | (x >> 2)) & 0x92492492;\ +} +#define packU48FormatToThreePacket( out, in) {\ +t1 = (u32)U16BIG(*(u16*)(in + 4)); \ +t2 = U32BIG(*(u32*)(in)); \ +t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \ +t1 = t1 << 1; \ +t2 = t2 << 2; \ +temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; \ +puckU32ToThree(temp1[0]); \ +puckU32ToThree(temp1[1]); \ +puckU32ToThree(temp1[2]); \ +temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \ +puckU32ToThree(temp2[0]); \ +puckU32ToThree(temp2[1]); \ +puckU32ToThree(temp2[2]); \ +out[0] = (temp1[0] >> 11) | (temp2[0] >> 22); \ +out[1] = (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); \ +out[2] = (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); \ +} + + +#define packU96FormatToThreePacket(out, in) {\ +t9 = U32BIG(((u32*)in)[2]); \ +t1 = U32BIG(((u32*)in)[1]); \ +t2 = U32BIG(((u32*)in)[0]); \ +t1_32 = (in[7] & 0x80) >> 7, t2_64 = (in[3] & 0x80) >> 7, t2_65 = (in[3] & 0x40) >> 6; \ +t1 = t1 << 1; \ +t2 = t2 << 2; \ +temp0[0] = t9; temp0[1] = t9 << 1; temp0[2] = t9 << 2; \ +puckU32ToThree(temp0[0]); \ +puckU32ToThree(temp0[1]); \ +puckU32ToThree(temp0[2]); \ +temp1[0] = t1; temp1[1] = t1 << 1; temp1[2] = t1 << 2; \ +puckU32ToThree(temp1[0]); \ +puckU32ToThree(temp1[1]); \ +puckU32ToThree(temp1[2]); \ +temp2[0] = t2; temp2[1] = t2 << 1; temp2[2] = t2 << 2; \ +puckU32ToThree(temp2[0]); \ +puckU32ToThree(temp2[1]); \ +puckU32ToThree(temp2[2]); \ +out[0] = (temp0[0]) | (temp1[0] >> 11) | (temp2[0] >> 22); \ +out[1] = (temp0[1]) | (temp1[1] >> 11) | (((u32)t2_64) << 10) | (temp2[1] >> 22); \ +out[2] = (temp0[2]) | (((u32)t1_32) << 21) | (temp1[2] >> 11) | (((u32)t2_65) << 10) | (temp2[2] >> 22); \ +} + #define unpackU96FormatToThreePacket( out, in) {\ +temp0[0] = in[0] & 0xffe00000; \ +temp1[0] = (in[0] & 0x001ffc00) << 11; \ +temp2[0] = (in[0] & 0x000003ff) << 22; \ +temp0[1] = in[1] & 0xffe00000; \ +temp1[1] = (in[1] & 0x001ff800) << 11; \ +t2_64 = ((in[1] & 0x00000400) << 21); \ +temp2[1] = (in[1] & 0x000003ff) << 22; \ +temp0[2] = in[2] & 0xffc00000; \ +t1_32 = ((in[2] & 0x00200000) << 10); \ +temp1[2] = (in[2] & 0x001ff800) << 11; \ +t2_65 = ((in[2] & 0x00000400) << 20); \ +temp2[2] = (in[2] & 0x000003ff) << 22; \ +unpuckU32ToThree(temp0[0]); \ +unpuckU32ToThree(temp0[1]); \ +unpuckU32ToThree(temp0[2]); \ +t9 = temp0[0] | temp0[1] >> 1 | temp0[2] >> 2; \ +unpuckU32ToThree(temp1[0]); \ +unpuckU32ToThree(temp1[1]); \ +unpuckU32ToThree(temp1[2]); \ +t1 = t1_32 | ((temp1[0] | temp1[1] >> 1 | temp1[2] >> 2) >> 1); \ +unpuckU32ToThree(temp2[0]); \ +unpuckU32ToThree(temp2[1]); \ +unpuckU32ToThree(temp2[2]); \ +t2 = t2_65 | t2_64 | ((temp2[0] | temp2[1] >> 1 | temp2[2] >> 2) >> 2); \ +*(u32*)(out) = U32BIG(t2); \ +*(u32*)(out + 4) = U32BIG(t1); \ +*(u32*)(out + 8) = U32BIG(t9); \ +} + +#define U96_BIT_LOTR32_1(t0,t1,t2,t3,t4,t5){\ +t3= t1;\ +t4 = t2;\ +t5 = LOTR32(t0, 1); \ +} +#define U96_BIT_LOTR32_8(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t2, 2);\ +t4 =LOTR32(t0, 3);\ +t5 = LOTR32(t1, 3); \ +} + +#define U96_BIT_LOTR32_55(t0,t1,t2,t3,t4,t5){\ +t3= LOTR32(t1, 18); \ +t4 = LOTR32(t2, 18);\ +t5 = LOTR32(t0, 19); \ +} + diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_3/crypto_aead.h b/knot/Implementations/crypto_aead/knot192/armcortexm_3/crypto_aead.h new file mode 100644 index 0000000..862d176 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_3/crypto_aead.h @@ -0,0 +1,18 @@ + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +); diff --git a/knot/Implementations/crypto_aead/knot192/armcortexm_3/encrypt.c b/knot/Implementations/crypto_aead/knot192/armcortexm_3/encrypt.c new file mode 100644 index 0000000..51c7c12 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot192/armcortexm_3/encrypt.c @@ -0,0 +1,214 @@ + +#include"auxFormat.h" + +#define aead_RATE (96 / 8) +#define PR0_ROUNDS 76 +#define PR_ROUNDS 40 +#define PRF_ROUNDS 44 +unsigned char constant7Format[127] = { + /*constant7Format[127]:*/ +0x01,0x08,0x40,0x02,0x10,0x80,0x05,0x09,0x48,0x42,0x12,0x90, +0x85,0x0c,0x41,0x0a,0x50,0x82,0x15,0x89,0x4d,0x4b,0x5a,0xd2, +0x97,0x9c,0xc4,0x06,0x11,0x88,0x45,0x0b,0x58,0xc2,0x17,0x99, +0xcd,0x4e,0x53,0x9a,0xd5,0x8e,0x54,0x83,0x1d,0xc9,0x4f,0x5b, +0xda,0xd7,0x9e,0xd4,0x86,0x14,0x81,0x0d,0x49,0x4a,0x52,0x92, +0x95,0x8c,0x44,0x03,0x18,0xc0,0x07,0x19,0xc8,0x47,0x1b,0xd8, +0xc7,0x1e,0xd1,0x8f,0x5c,0xc3,0x1f,0xd9,0xcf,0x5e,0xd3,0x9f, +0xdc,0xc6,0x16,0x91,0x8d,0x4c,0x43,0x1a,0xd0,0x87,0x1c,0xc1, +0x0f,0x59,0xca,0x57,0x9b,0xdd,0xce,0x56,0x93,0x9d,0xcc,0x46, +0x13,0x98,0xc5,0x0e,0x51,0x8a,0x55,0x8b,0x5d,0xcb,0x5f,0xdb, +0xdf,0xde,0xd6,0x96,0x94,0x84,0x04, }; +#define ROUND384(lunNum) {\ +s[0] ^= (constant7Format[lunNum] >> 6) & 0x3;\ +s[1] ^= (constant7Format[lunNum] >> 3) & 0x7;\ +s[2] ^= constant7Format[lunNum] & 0x7;\ +sbox(s[0], s[3], s[6], s[9] , s_temp[0], s_temp[3], s_temp[6], s_temp[9]);\ +sbox(s[1], s[4], s[7], s[10], s_temp[1], s_temp[4], s_temp[7], s_temp[10]);\ +sbox(s[2], s[5], s[8], s[11], s_temp[2], s_temp[5], s_temp[8], s_temp[11]);\ +s[0] = s_temp[0], s[1] = s_temp[1], s[2] = s_temp[2];\ +U96_BIT_LOTR32_1(s_temp[3], s_temp [4], s_temp[ 5], s[3], s[4], s[5]);\ +U96_BIT_LOTR32_8(s_temp[6], s_temp [7], s_temp[ 8], s[6], s[7], s[8]);\ +U96_BIT_LOTR32_55(s_temp[9], s_temp[10], s_temp[11], s[9], s[10], s[11]);\ +} +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k) { + + u8 i; + u32 s[12] = { 0 }; + u32 dataFormat[3] = { 0 }; + u8 tempData[12] = { 0 }; + u32 s_temp[12] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + u32 t1_32, t2_64, t2_65; + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + *clen = mlen + CRYPTO_ABYTES; + // initialization + packU96FormatToThreePacket(s, npub); + packU96FormatToThreePacket((s + 3), (npub + 12)); + packU96FormatToThreePacket((s + 6), k); + packU96FormatToThreePacket((s + 9), (k + 12)); + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND384(i); + } + // process associated data + if (adlen) { + // rlen = adlen; + while (adlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen); + tempData[adlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + } + s[9] ^= 0x80000000; + if (mlen) { + //rlen = mlen; + while (mlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, m); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + unpackU96FormatToThreePacket(c, s); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + mlen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, m, mlen); + tempData[mlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + unpackU96FormatToThreePacket(tempData, s); + memcpy(c, tempData, mlen); + c += mlen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND384(i); + } + // return tag + unpackU96FormatToThreePacket(c, s); + unpackU96FormatToThreePacket((c + 12), (s + 3)); + return 0; +} + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { + u8 i, j; + u32 s[12] = { 0 }; + u32 dataFormat[6] = { 0 }; + u32 dataFormat_1[3] = { 0 }; + u8 tempData[12] = { 0 }; + u8 tempU8[48] = { 0 }; + u32 s_temp[12] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + u32 t1_32, t2_64, t2_65; + u32 temp0[3] = { 0 }; + u32 temp1[3] = { 0 }; + u32 temp2[3] = { 0 }; + *mlen = clen - CRYPTO_ABYTES; + if (clen < CRYPTO_ABYTES) + return -1; + // initialization + packU96FormatToThreePacket(s, npub); + packU96FormatToThreePacket((s + 3), (npub + 12)); + packU96FormatToThreePacket((s + 6), k); + packU96FormatToThreePacket((s + 9), (k + 12)); + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND384(i); + } + // process associated data + if (adlen) { + // rlen = adlen; + while (adlen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + + memcpy(tempData, ad, adlen); + tempData[adlen] = 0x01; + packU96FormatToThreePacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + } + s[9] ^= 0x80000000; + clen -= CRYPTO_ABYTES; + if (clen) { + while (clen >= aead_RATE) { + packU96FormatToThreePacket(dataFormat, c); + dataFormat_1[0] = s[0] ^ dataFormat[0]; + dataFormat_1[1] = s[1] ^ dataFormat[1]; + dataFormat_1[2] = s[2] ^ dataFormat[2]; + unpackU96FormatToThreePacket(m, dataFormat_1); + s[0] = dataFormat[0]; + s[1] = dataFormat[1]; + s[2] = dataFormat[2]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND384(i); + } + clen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + unpackU96FormatToThreePacket(tempU8, s); + for (i = 0; i < clen; ++i, ++m, ++c) + { + *m = tempU8[i] ^ *c; + tempU8[i] = *c; + } + tempU8[i] ^= 0x01; + packU96FormatToThreePacket(s, tempU8); + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND384(i); + } + // return tag + packU96FormatToThreePacket(dataFormat, c); + packU96FormatToThreePacket((dataFormat + 3), (c + 12)); + if (dataFormat[0] != s[0] || dataFormat[1] != s[1] || dataFormat[2] != s[2] || dataFormat[3] != s[3] + || dataFormat[4] != s[4] || dataFormat[5] != s[5]) { + return -1; + } + ////////// + return 0; +} diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_3/api.h b/knot/Implementations/crypto_aead/knot256/armcortexm_3/api.h new file mode 100644 index 0000000..b26e378 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_3/api.h @@ -0,0 +1,8 @@ +#define CRYPTO_KEYBYTES 32 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 32 +#define CRYPTO_ABYTES 32 +#define CRYPTO_NOOVERLAP 1 + + + diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_3/auxFormat.h b/knot/Implementations/crypto_aead/knot256/armcortexm_3/auxFormat.h new file mode 100644 index 0000000..969d758 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_3/auxFormat.h @@ -0,0 +1,114 @@ + +#include"crypto_aead.h" +#include"api.h" +#include +#define U32BIG(x) (x) + + +#define ARR_SIZE(a) (sizeof((a))/sizeof((a[0]))) +#define LOTR32(x,n) (((x)<<(n))|((x)>>(32-(n)))) + + +#define sbox(a, b, c, d, e, f, g, h) \ +{ \ + t1 = ~a; t2 = b & t1;t3 = c ^ t2; h = d ^ t3; t5 = b | c; t6 = d ^ t1; g = t5 ^ t6; t8 = b ^ d; t9 = t3 & t6; e = t8 ^ t9; t11 = g & t8; f = t3 ^ t11; \ +} + +typedef unsigned char u8; +typedef unsigned int u32; +typedef unsigned long long u64; +void printU8(char name[], u8 var[], long len, int offset); + + +#define puck32(in)\ +{\ +t9 = (in ^ (in >> 1)) & 0x22222222; in ^= t9 ^ (t9 << 1);\ +t9 = (in ^ (in >> 2)) & 0x0C0C0C0C; in ^= t9 ^ (t9 << 2);\ +t9 = (in ^ (in >> 4)) & 0x00F000F0; in ^= t9 ^ (t9 << 4);\ +t9 = (in ^ (in >> 8)) & 0x0000FF00; in ^= t9 ^ (t9 << 8);\ +} + +#define unpuck32(t0){\ + t9 = (t0 ^ (t0 >> 8)) & 0x0000FF00, t0 ^= t9 ^ (t9 << 8); \ + t9 = (t0 ^ (t0 >> 4)) & 0x00F000F0, t0 ^= t9 ^ (t9 << 4); \ + t9 = (t0 ^ (t0 >> 2)) & 0x0C0C0C0C, t0 ^= t9 ^ (t9 << 2); \ + t9 = (t0 ^ (t0 >> 1)) & 0x22222222, t0 ^= t9 ^ (t9 << 1); \ +} + +#define packU128FormatToFourPacket(out,in) {\ + t8 = U32BIG(((u32*)in)[0]); \ + t1 = U32BIG(((u32*)in)[1]); \ + t2 = U32BIG(((u32*)in)[2]); \ + t3 = U32BIG(((u32*)in)[3]); \ + puck32(t8); puck32(t8); \ + puck32(t1); puck32(t1); \ + puck32(t2); puck32(t2); \ + puck32(t3); puck32(t3); \ + out[3] = t3 & 0xff000000 | ((t2 >> 8) & 0x00ff0000) | ((t1 >> 16) & 0x0000ff00) | (t8 >> 24); \ + out[2] = ((t3 << 8) & 0xff000000) | (t2 & 0x00ff0000) | ((t1 >> 8) & 0x0000ff00) | ((t8 >> 16) & 0x000000ff); \ + out[1] = ((t3 << 16) & 0xff000000) | ((t2 << 8) & 0x00ff0000) | (t1 & 0x0000ff00) | ((t8 >> 8) & 0x000000ff); \ + out[0] = ((t3 << 24) & 0xff000000) | ((t2 << 16) & 0x00ff0000) | ((t1 << 8) & 0x0000ff00) | (t8 & 0x000000ff); \ +} + +#define unpackU128FormatToFourPacket( out, in) {\ +memcpy(dataFormat, in, sizeof(unsigned int) * 4); \ +t3 = dataFormat[3] & 0xff000000 | ((dataFormat[2] >> 8) & 0x00ff0000) | ((dataFormat[1] >> 16) & 0x0000ff00) | (dataFormat[0] >> 24); \ +t2 = ((dataFormat[3] << 8) & 0xff000000) | (dataFormat[2] & 0x00ff0000) | ((dataFormat[1] >> 8) & 0x0000ff00) | ((dataFormat[0] >> 16) & 0x000000ff); \ +t1 = ((dataFormat[3] << 16) & 0xff000000) | ((dataFormat[2] << 8) & 0x00ff0000) | (dataFormat[1] & 0x0000ff00) | ((dataFormat[0] >> 8) & 0x000000ff); \ +t8 = ((dataFormat[3] << 24) & 0xff000000) | ((dataFormat[2] << 16) & 0x00ff0000) | ((dataFormat[1] << 8) & 0x0000ff00) | (dataFormat[0] & 0x000000ff); \ +unpuck32(t8); unpuck32(t8); \ +unpuck32(t1); unpuck32(t1); \ +unpuck32(t2); unpuck32(t2); \ +unpuck32(t3); unpuck32(t3); \ +((u32*)out)[0] = U32BIG(t8); \ +((u32*)out)[1] = U32BIG(t1); \ +((u32*)out)[2] = U32BIG(t2); \ +((u32*)out)[3] = U32BIG(t3); \ +} + +#define packU64FormatToFourPacket( out, in) {\ +t1 = U32BIG(((u32*)in)[0]); \ +t2 = U32BIG(((u32*)in)[1]); \ +puck32(t1); \ +puck32(t1); \ +puck32(t2); \ +puck32(t2); \ +out[3] = ((t2 >> 16) & 0x0000ff00) | ((t1 >> 24)); \ +out[2] = ((t2 >> 8) & 0x0000ff00) | ((t1 >> 16) & 0x000000ff); \ +out[1] = (t2 & 0x0000ff00) | ((t1 >> 8) & 0x000000ff); \ +out[0] = ((t2 << 8) & 0x0000ff00) | (t1 & 0x000000ff); \ +} +#define BIT_LOTR32_1(t0,t1,t2,t3,t4,t5,t6,t7){\ +t4= LOTR32(t3, 1);\ +t5 = t0;\ +t6 = t1; \ +t7 = t2; \ +} +#define BIT_LOTR32_16(t0,t1,t2,t3,t4,t5,t6,t7){\ +t4= LOTR32(t0, 4);\ +t5 = LOTR32(t1, 4);\ +t6 = LOTR32(t2, 4); \ +t7 = LOTR32(t3, 4); \ +} +#define BIT_LOTR32_25(t0,t1,t2,t3,t4,t5,t6,t7){\ +t4= LOTR32(t3, 7);\ +t5 = LOTR32(t0, 6);\ +t6 = LOTR32(t1, 6); \ +t7 = LOTR32(t2, 6); \ +} + +#define ROUND512( arr,lunNum) {\ +s[3] ^= (arr[lunNum] >> 6) & 0x3;\ +s[2] ^= (arr[lunNum] >> 4) & 0x3;\ +s[1] ^= (arr[lunNum] >> 2) & 0x3;\ +s[0] ^= arr[lunNum] & 0x3;\ +sbox(s[0], s[4], s[8], s[12], s_temp[0], s_temp[4], s_temp[8], s_temp[12]);\ +sbox(s[1], s[5], s[9], s[13], s_temp[1], s_temp[5], s_temp[9], s_temp[13]);\ +sbox(s[2], s[6], s[10], s[14], s_temp[2], s_temp[6], s_temp[10], s_temp[14]);\ +sbox(s[3], s[7], s[11], s[15], s_temp[3], s_temp[7], s_temp[11], s_temp[15]);\ +s[0] = s_temp[0], s[1] = s_temp[1], s[2] = s_temp[2], s[3] = s_temp[3];\ +BIT_LOTR32_1(s_temp[4], s_temp[5], s_temp[6], s_temp[7], s[4], s[5], s[6], s[7]);\ +BIT_LOTR32_16(s_temp[8], s_temp[9], s_temp[10], s_temp[11], s[8], s[9], s[10], s[11]);\ +BIT_LOTR32_25(s_temp[12], s_temp[13], s_temp[14], s_temp[15], s[12], s[13], s[14], s[15]);\ +} + diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_3/crypto_aead.h b/knot/Implementations/crypto_aead/knot256/armcortexm_3/crypto_aead.h new file mode 100644 index 0000000..cdfdf19 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_3/crypto_aead.h @@ -0,0 +1,17 @@ +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +); + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +); diff --git a/knot/Implementations/crypto_aead/knot256/armcortexm_3/encrypt.c b/knot/Implementations/crypto_aead/knot256/armcortexm_3/encrypt.c new file mode 100644 index 0000000..c9ef428 --- /dev/null +++ b/knot/Implementations/crypto_aead/knot256/armcortexm_3/encrypt.c @@ -0,0 +1,330 @@ + +#include"auxFormat.h" + +#define aead_RATE (128 / 8) +#define PR0_ROUNDS 100 +#define PR_ROUNDS 52 +#define PRF_ROUNDS 56 +unsigned char constant7Format_aead[127] = { + /*constant7_aead_256*/ +0x1, +0x4, +0x10, +0x40, +0x2, +0x8, +0x21, +0x5, +0x14, +0x50, +0x42, +0xa, +0x29, +0x24, +0x11, +0x44, +0x12, +0x48, +0x23, +0xd, +0x35, +0x55, +0x56, +0x5a, +0x6b, +0x2e, +0x38, +0x60, +0x3, +0xc, +0x31, +0x45, +0x16, +0x58, +0x63, +0xf, +0x3d, +0x74, +0x53, +0x4e, +0x3b, +0x6c, +0x32, +0x49, +0x27, +0x1d, +0x75, +0x57, +0x5e, +0x7b, +0x6e, +0x3a, +0x68, +0x22, +0x9, +0x25, +0x15, +0x54, +0x52, +0x4a, +0x2b, +0x2c, +0x30, +0x41, +0x6, +0x18, +0x61, +0x7, +0x1c, +0x71, +0x47, +0x1e, +0x79, +0x66, +0x1b, +0x6d, +0x36, +0x59, +0x67, +0x1f, +0x7d, +0x76, +0x5b, +0x6f, +0x3e, +0x78, +0x62, +0xb, +0x2d, +0x34, +0x51, +0x46, +0x1a, +0x69, +0x26, +0x19, +0x65, +0x17, +0x5c, +0x73, +0x4f, +0x3f, +0x7c, +0x72, +0x4b, +0x2f, +0x3c, +0x70, +0x43, +0xe, +0x39, +0x64, +0x13, +0x4c, +0x33, +0x4d, +0x37, +0x5d, +0x77, +0x5f, +0x7f, +0x7e, +0x7a, +0x6a, +0x2a, +0x28, +0x20, +}; + + + +int crypto_aead_encrypt( + unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k +) { + u32 i, j; + u32 s_temp[16] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + // initialization + u32 s[16] = { 0 }; + u32 dataFormat[4] = { 0 }; + u8 tempData[16] = {0}; + *clen = mlen + CRYPTO_ABYTES; + //initialization + packU128FormatToFourPacket(s, npub); + packU128FormatToFourPacket((s + 4), (npub + 16)); + packU128FormatToFourPacket((s + 8), k); + packU128FormatToFourPacket((s + 12), (k + 16)); + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND512(constant7Format_aead,i); + } + // process associated data + if (adlen) { + while (adlen >= aead_RATE) { + packU128FormatToFourPacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + s[3] ^= dataFormat[3]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(constant7Format_aead, i); + } + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packU128FormatToFourPacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + s[3] ^= dataFormat[3]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(constant7Format_aead, i); + } + } + s[15] ^= 0x80000000; + if (mlen) { + while (mlen >= aead_RATE) { + packU128FormatToFourPacket(dataFormat, m); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + s[3] ^= dataFormat[3]; + unpackU128FormatToFourPacket(c, s); + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(constant7Format_aead, i); + } + mlen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + memcpy(tempData, m, mlen * sizeof(unsigned char)); + tempData[mlen]= 0x01; + packU128FormatToFourPacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + s[3] ^= dataFormat[3]; + unpackU128FormatToFourPacket(tempData, s); + memcpy(c, tempData, mlen * sizeof(unsigned char)); + c += mlen; + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND512(constant7Format_aead, i); + } + // return tag + unpackU128FormatToFourPacket(c, s); + unpackU128FormatToFourPacket((c+16), (s+4)); + return 0; +} + +int crypto_aead_decrypt( + unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k +){ + u32 s_temp[16] = { 0 }; + u32 t1, t2, t3, t5, t6, t8, t9, t11; + u8 i, j; + // initialization + u32 s[16] = { 0 }; + u32 dataFormat[4] = { 0 }; + u32 dataFormat_1[4] = { 0 }; + u32 dataFormat_2[4] = { 0 }; + u8 tempData[16] = { 0 }; + u8 tempU8[64] = { 0 }; + + if (clen < CRYPTO_ABYTES) + return -1; + *mlen = clen - CRYPTO_ABYTES; + //initialization + packU128FormatToFourPacket(s, npub); + packU128FormatToFourPacket((s + 4), (npub + 16)); + packU128FormatToFourPacket((s + 8), k); + packU128FormatToFourPacket((s + 12), (k + 16)); + for (i = 0; i < PR0_ROUNDS; i++) { + ROUND512(constant7Format_aead, i); + } + // process associated data + if (adlen) { + while (adlen >= aead_RATE) { + packU128FormatToFourPacket(dataFormat, ad); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + s[3] ^= dataFormat[3]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(constant7Format_aead, i); + } + adlen -= aead_RATE; + ad += aead_RATE; + } + memset(tempData, 0, sizeof(tempData)); + + memcpy(tempData, ad, adlen * sizeof(unsigned char)); + tempData[adlen] = 0x01; + packU128FormatToFourPacket(dataFormat, tempData); + s[0] ^= dataFormat[0]; + s[1] ^= dataFormat[1]; + s[2] ^= dataFormat[2]; + s[3] ^= dataFormat[3]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(constant7Format_aead, i); + } + } + s[15] ^= 0x80000000; + clen = clen - CRYPTO_KEYBYTES; + + if (clen) { + while (clen >= aead_RATE) { + packU128FormatToFourPacket(dataFormat_2, c); + dataFormat_1[0] = s[0] ^ dataFormat_2[0]; + dataFormat_1[1] = s[1] ^ dataFormat_2[1]; + dataFormat_1[2] = s[2] ^ dataFormat_2[2]; + dataFormat_1[3] = s[3] ^ dataFormat_2[3]; + unpackU128FormatToFourPacket(m, dataFormat_1); + s[0] = dataFormat_2[0]; + s[1] = dataFormat_2[1]; + s[2] = dataFormat_2[2]; + s[3] = dataFormat_2[3]; + for (i = 0; i < PR_ROUNDS; i++) { + ROUND512(constant7Format_aead, i); + } + clen -= aead_RATE; + m += aead_RATE; + c += aead_RATE; + } + unpackU128FormatToFourPacket(tempU8, s); + for (i = 0; i < clen; ++i, ++m, ++c) + { + *m = tempU8[i] ^ *c; + tempU8[i] = *c; + } + tempU8[i] ^= 0x01; + packU128FormatToFourPacket(s, tempU8); + } + // finalization + for (i = 0; i < PRF_ROUNDS; i++) { + ROUND512(constant7Format_aead, i); + } + // return tag + packU128FormatToFourPacket(dataFormat, c); + packU128FormatToFourPacket(dataFormat_1, (c + 16)); + if (dataFormat[0] != s[0] || dataFormat[1] != s[1] || dataFormat[2] != s[2] || dataFormat[3] != s[3] + || dataFormat_1[0] != s[4] || dataFormat_1[1] != s[5] || dataFormat_1[2] != s[6] || dataFormat_1[3] != s[7]) { + return -1; + } + return 0; + +} \ No newline at end of file diff --git a/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/api.h b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/encrypt.c b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/encrypt.c new file mode 100644 index 0000000..495399b --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/encrypt.c @@ -0,0 +1,1337 @@ +/* + * Date: 29 November 2018 + * Contact: Thomas Peyrin - thomas.peyrin@gmail.com + * Mustafa Khairallah - mustafam001@e.ntu.edu.sg + */ + +#include "crypto_aead.h" +#include "api.h" +#include "skinny.h" +#include +#include + +void pad (const unsigned char* m, unsigned char* mp, int len8) { + +#ifdef ___ENABLE_DWORD_CAST + + if (0 == len8) { + *(uint64_t*)(&mp[0]) = 0; + *(uint64_t*)(&mp[8]) = 0; + } else if (8 > len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]) & (0xffffffffffffffff >> (64 - len8*8)); + *(uint64_t*)(&mp[8]) = 0; + mp[15] = len8; + } else if (8 == len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = 0; + mp[15] = 8; + } else if (16 > len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]) & (0xffffffffffffffff >> (128 - len8*8)); + mp[15] = len8; + } else { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]); + } + +#else + + if (0 == len8) { + *(uint32_t*)(&mp[0]) = 0; + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + } else if (4 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]) & (0xffffffff >> (32 - len8*8)); + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (4 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 4; + } else if (8 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]) & (0xffffffff >> (64 - len8*8)); + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (8 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 8; + } else if (12 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]) & (0xffffffff >> (96 - len8*8)); + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (12 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 12; + } else if (16 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]) & (0xffffffff >> (128 - len8*8)); + mp[15] = len8; + } else { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]); + } + +#endif + +} + +void g8A (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t c0, c1; + + c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080); + + *(uint64_t*)(&c[0]) = c0; + *(uint64_t*)(&c[8]) = c1; + +#else + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#endif + +} + +void g8A_for_Tag_Generation (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t c0, c1; + + c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = (c0>>24)&0xFF; + c[4] = (c0>>32)&0xFF; + c[5] = (c0>>40)&0xFF; + c[6] = (c0>>48)&0xFF; + c[7] = c0>>56; + c[8] = c1 &0xFF; + c[9] = (c1>>8) &0xFF; + c[10] = (c1>>16)&0xFF; + c[11] = (c1>>24)&0xFF; + c[12] = (c1>>32)&0xFF; + c[13] = (c1>>40)&0xFF; + c[14] = (c1>>48)&0xFF; + c[15] = c1>>56; + +#else + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = c0>>24; + c[4] = c1 &0xFF; + c[5] = (c1>>8) &0xFF; + c[6] = (c1>>16)&0xFF; + c[7] = c1>>24; + c[8] = c2 &0xFF; + c[9] = (c2>>8) &0xFF; + c[10] = (c2>>16)&0xFF; + c[11] = c2>>24; + c[12] = c3 &0xFF; + c[13] = (c3>>8) &0xFF; + c[14] = (c3>>16)&0xFF; + c[15] = c3>>24; + +#endif + +} + +void rho_ad_eqov16 ( + const unsigned char* m, + unsigned char* s) { + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) ^= *(uint64_t*)(&m[0]); + *(uint64_t*)(&s[8]) ^= *(uint64_t*)(&m[8]); + +#else + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&m[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&m[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&m[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&m[12]); + +#endif + +} + +void rho_ad_ud16 ( + const unsigned char* m, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + pad(m,mp,len8); + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) ^= *(uint64_t*)(&mp[0]); + *(uint64_t*)(&s[8]) ^= *(uint64_t*)(&mp[8]); + +#else + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&mp[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&mp[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&mp[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&mp[12]); + +#endif + +} + +void rho_eqov16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s) { + + g8A(s,c); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t c0 = *(uint64_t*)(&c[0]); + uint64_t c1 = *(uint64_t*)(&c[8]); + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t m0 = *(uint64_t*)(&m[0]); + uint64_t m1 = *(uint64_t*)(&m[8]); + + s0 ^= m0; + s1 ^= m1; + + c0 ^= m0; + c1 ^= m1; + + *(uint64_t*)(&s[0]) = s0; + *(uint64_t*)(&s[8]) = s1; + + *(uint64_t*)(&c[0]) = c0; + *(uint64_t*)(&c[8]) = c1; + +#else + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= m0; + s1 ^= m1; + s2 ^= m2; + s3 ^= m3; + + c0 ^= m0; + c1 ^= m1; + c2 ^= m2; + c3 ^= m3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#endif + +} + +void rho_ud16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + + pad(m,mp,len8); + + g8A(s,c); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t mp0 = *(uint64_t*)&mp[0]; + uint64_t mp1 = *(uint64_t*)&mp[8]; + uint64_t c0 = *(uint64_t*)&c[0]; + uint64_t c1 = *(uint64_t*)&c[8]; + + *(uint64_t*)(&s[0]) ^= mp0; + *(uint64_t*)(&s[8]) ^= mp1; + + if (0 == len8) { + c0 = 0; + c1 = 0; + } else if (8 > len8) { + c0 = c0 ^ (mp0 & 0xffffffffffffffff >> (64 - (len8*8))); + c0 = c0 ^ (c0 & 0xffffffffffffffff << ( (len8*8))); + c1 = 0; + } else if (8 == len8) { + c0 = c0 ^ mp0; + c1 = 0; + } else if (16 > len8) { + len8 -= 8; + c0 = c0 ^ mp0; + c1 = c1 ^ (mp1 & 0xffffffffffffffff >> (64 - (len8*8))); + c1 = c1 ^ (c1 & 0xffffffffffffffff << ( (len8*8))); + } else { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + } + + *(uint64_t*)&c[0] = c0; + *(uint64_t*)&c[8] = c1; + +#else + + uint32_t mp0 = *(uint32_t*)&mp[0]; + uint32_t mp1 = *(uint32_t*)&mp[4]; + uint32_t mp2 = *(uint32_t*)&mp[8]; + uint32_t mp3 = *(uint32_t*)&mp[12]; + uint32_t c0 = *(uint32_t*)&c[0]; + uint32_t c1 = *(uint32_t*)&c[4]; + uint32_t c2 = *(uint32_t*)&c[8]; + uint32_t c3 = *(uint32_t*)&c[12]; + + *(uint32_t*)(&s[0]) ^= mp0; + *(uint32_t*)(&s[4]) ^= mp1; + *(uint32_t*)(&s[8]) ^= mp2; + *(uint32_t*)(&s[12]) ^= mp3; + + if (0 == len8) { + c0 = 0; + c1 = 0; + c2 = 0; + c3 = 0; + } else if (4 > len8) { + c0 = c0 ^ (mp0 & 0xffffffff >> (32 - (len8*8))); + c0 = c0 ^ (c0 & 0xffffffff << ( (len8*8))); + c1 = 0; + c2 = 0; + c3 = 0; + } else if (4 == len8) { + c0 = c0 ^ mp0; + c1 = 0; + c2 = 0; + c3 = 0; + } else if (8 > len8) { + len8 -= 4; + c0 = c0 ^ mp0; + c1 = c1 ^ (mp1 & 0xffffffff >> (32 - (len8*8))); + c1 = c1 ^ (c1 & 0xffffffff << ( (len8*8))); + c2 = 0; + c3 = 0; + } else if (8 == len8) { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = 0; + c3 = 0; + } else if (12 > len8) { + len8 -= 8; + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ (mp2 & 0xffffffff >> (32 - (len8*8))); + c2 = c2 ^ (c2 & 0xffffffff << ( (len8*8))); + c3 = 0; + } else if (12 == len8) { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = 0; + } else if (16 > len8) { + len8 -= 12; + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = c3 ^ (mp3 & 0xffffffff >> (32 - (len8*8))); + c3 = c3 ^ (c3 & 0xffffffff << ( (len8*8))); + } else { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = c3 ^ mp3; + } + + *(uint32_t*)&c[0] = c0; + *(uint32_t*)&c[4] = c1; + *(uint32_t*)&c[8] = c2; + *(uint32_t*)&c[12] = c3; + +#endif + +} + +void irho_eqov16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s) { + + g8A(s,m); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t c0 = *(uint64_t*)(&c[0]); + uint64_t c1 = *(uint64_t*)(&c[8]); + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t m0 = *(uint64_t*)(&m[0]); + uint64_t m1 = *(uint64_t*)(&m[8]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + + m0 ^= c0; + m1 ^= c1; + + *(uint64_t*)(&s[0]) = s0; + *(uint64_t*)(&s[8]) = s1; + + *(uint64_t*)(&m[0]) = m0; + *(uint64_t*)(&m[8]) = m1; + +#else + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + s2 ^= c2 ^ m2; + s3 ^= c3 ^ m3; + + m0 ^= c0; + m1 ^= c1; + m2 ^= c2; + m3 ^= c3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&m[0]) = m0; + *(uint32_t*)(&m[4]) = m1; + *(uint32_t*)(&m[8]) = m2; + *(uint32_t*)(&m[12]) = m3; + +#endif + +} + +void irho_ud16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char cp [16]; + + pad(c,cp,len8); + + g8A(s,m); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t cp0 = *(uint64_t*)&cp[0]; + uint64_t cp1 = *(uint64_t*)&cp[8]; + uint64_t m0 = *(uint64_t*)&m[0]; + uint64_t m1 = *(uint64_t*)&m[8]; + uint64_t s0 = *(uint64_t*)&s[0]; + uint64_t s1 = *(uint64_t*)&s[8]; + + s0 ^= cp0; + s1 ^= cp1; + + if (0 == len8) { + m0 = 0; + m1 = 0; + } else if (8 > len8) { + s0 = s0 ^ (m0 & 0xffffffffffffffff >> (64 - (len8*8))); + + m0 = m0 ^ (cp0 & 0xffffffffffffffff >> (64 - (len8*8))); + m0 = m0 ^ (m0 & 0xffffffffffffffff << ( (len8*8))); + m1 = 0; + } else if (8 == len8) { + s0 = s0 ^ m0; + + m0 = m0 ^ cp0; + m1 = 0; + } else if (16 > len8) { + len8 -= 8; + s0 = s0 ^ m0; + s1 = s1 ^ (m1 & 0xffffffffffffffff >> (64 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ (cp1 & 0xffffffffffffffff >> (64 - (len8*8))); + m1 = m1 ^ (m1 & 0xffffffffffffffff << ( (len8*8))); + } else { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + } + + *(uint64_t*)&s[0] = s0; + *(uint64_t*)&s[8] = s1; + *(uint64_t*)&m[0] = m0; + *(uint64_t*)&m[8] = m1; + +#else + + uint32_t cp0 = *(uint32_t*)&cp[0]; + uint32_t cp1 = *(uint32_t*)&cp[4]; + uint32_t cp2 = *(uint32_t*)&cp[8]; + uint32_t cp3 = *(uint32_t*)&cp[12]; + uint32_t m0 = *(uint32_t*)&m[0]; + uint32_t m1 = *(uint32_t*)&m[4]; + uint32_t m2 = *(uint32_t*)&m[8]; + uint32_t m3 = *(uint32_t*)&m[12]; + uint32_t s0 = *(uint32_t*)&s[0]; + uint32_t s1 = *(uint32_t*)&s[4]; + uint32_t s2 = *(uint32_t*)&s[8]; + uint32_t s3 = *(uint32_t*)&s[12]; + + s0 ^= cp0; + s1 ^= cp1; + s2 ^= cp2; + s3 ^= cp3; + + if (0 == len8) { + m0 = 0; + m1 = 0; + m2 = 0; + m3 = 0; + } else if (4 > len8) { + s0 = s0 ^ (m0 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ (cp0 & 0xffffffff >> (32 - (len8*8))); + m0 = m0 ^ (m0 & 0xffffffff << ( (len8*8))); + m1 = 0; + m2 = 0; + m3 = 0; + } else if (4 == len8) { + s0 = s0 ^ m0; + + m0 = m0 ^ cp0; + m1 = 0; + m2 = 0; + m3 = 0; + } else if (8 > len8) { + len8 -= 4; + s0 = s0 ^ m0; + s1 = s1 ^ (m1 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ (cp1 & 0xffffffff >> (32 - (len8*8))); + m1 = m1 ^ (m1 & 0xffffffff << ( (len8*8))); + m2 = 0; + m3 = 0; + } else if (8 == len8) { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = 0; + m3 = 0; + } else if (12 > len8) { + len8 -= 8; + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ (m2 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ (cp2 & 0xffffffff >> (32 - (len8*8))); + m2 = m2 ^ (m2 & 0xffffffff << ( (len8*8))); + m3 = 0; + } else if (12 == len8) { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = 0; + } else if (16 > len8) { + len8 -= 12; + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + s3 = s3 ^ (m3 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = m3 ^ (cp3 & 0xffffffff >> (32 - (len8*8))); + m3 = m3 ^ (m3 & 0xffffffff << ( (len8*8))); + } else { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + s3 = s3 ^ m3; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = m3 ^ cp3; + } + + *(uint32_t*)&s[0] = s0; + *(uint32_t*)&s[4] = s1; + *(uint32_t*)&s[8] = s2; + *(uint32_t*)&s[12] = s3; + *(uint32_t*)&m[0] = m0; + *(uint32_t*)&m[4] = m1; + *(uint32_t*)&m[8] = m2; + *(uint32_t*)&m[12] = m3; + +#endif + +} + +void reset_lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&CNT[0]) = 0x0000000000000001; // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0 + +#else + + *(uint32_t*)(&CNT[0]) = 0x00000001; // CNT3 CNT2 CNT1 CNT0 + *(uint32_t*)(&CNT[4]) = 0x00000000; // CNT7 CNT6 CNT5 CNT4 + +#endif + +} + +void lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t C0; + uint64_t fb0; + + C0 = *(uint64_t*)(&CNT[0]); // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C0 = C0 << 1 ^ fb0; + + *(uint64_t*)(&CNT[0]) = C0; + +#else + + uint32_t C0; + uint32_t C1; + uint32_t fb0; + + C0 = *(uint32_t*)(&CNT[0]); // CNT3 CNT2 CNT1 CNT0 + C1 = *(uint32_t*)(&CNT[4]); // CNT7 CNT6 CNT5 CNT4 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C1 = C1 << 1 | C0 >> 31; + C0 = C0 << 1 ^ fb0; + + *(uint32_t*)(&CNT[0]) = C0; + *(uint32_t*)(&CNT[4]) = C1; + +#endif + +} + +void block_cipher( + unsigned char* s, + const unsigned char* k, unsigned char* T, + unsigned char* CNT, unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + CNT[7] = D; + p_skinny_ctrl->func_skinny_128_384_enc(s, p_skinny_ctrl, CNT, T, k); + +} + +void nonce_encryption ( + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + block_cipher(s,k,(unsigned char*)N,CNT,D,p_skinny_ctrl); + +} + +void generate_tag ( + unsigned char** c, unsigned char* s, + unsigned long long* clen) { + + g8A_for_Tag_Generation(s, *c); + + *c = *c + 16; + *c = *c - *clen; + +} + +unsigned long long msg_encryption ( + const unsigned char** M, unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* l_skinny_ctrl) { + + int len8; + + if (mlen >= 16) { + len8 = 16; + mlen = mlen - 16; + rho_eqov16(*M, *c, s); + } + else { + len8 = mlen; + mlen = 0; + rho_ud16(*M, *c, s, len8); + } + *c = *c + len8; + *M = *M + len8; + lfsr_gf56(CNT); + if (mlen != 0) { + nonce_encryption(N,CNT,s,k,D,l_skinny_ctrl); + } + return mlen; + +} + +unsigned long long msg_decryption ( + unsigned char** M, const unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long clen, + skinny_ctrl* l_skinny_ctrl) { + + int len8; + + if (clen >= 16) { + len8 = 16; + clen = clen - 16; + irho_eqov16(*M, *c, s); + } + else { + len8 = clen; + clen = 0; + irho_ud16(*M, *c, s, len8); + } + *c = *c + len8; + *M = *M + len8; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,l_skinny_ctrl); + return clen; + +} + +unsigned long long ad2msg_encryption ( + const unsigned char** M, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* l_skinny_ctrl) { + + unsigned char T [16]; + int len8; + + if (mlen <= 16) { + len8 = mlen; + mlen = 0; + } + else { + len8 = 16; + mlen = mlen - 16; + } + + pad (*M,T,len8); + block_cipher(s,k,T,CNT,D,l_skinny_ctrl); + lfsr_gf56(CNT); + *M = *M + len8; + + return mlen; + +} + +unsigned long long ad_encryption ( + const unsigned char** A, unsigned char* s, + const unsigned char* k, unsigned long long adlen, + unsigned char* CNT, + unsigned char D, + skinny_ctrl* l_skinny_ctrl) { + + unsigned char T [16]; + int len8; + + if (adlen >= 16) { + len8 = 16; + adlen = adlen - 16; + + rho_ad_eqov16(*A, s); + } + else { + len8 = adlen; + adlen = 0; + rho_ad_ud16(*A, s, len8); + } + *A = *A + len8; + lfsr_gf56(CNT); + if (adlen != 0) { + if (adlen >= 16) { + len8 = 16; + adlen = adlen - 16; + } + else { + len8 = adlen; + adlen = 0; + } + pad(*A, T, len8); + *A = *A + len8; + block_cipher(s,k,T,CNT,D,l_skinny_ctrl); + lfsr_gf56(CNT); + } + + return adlen; + +} + +int crypto_aead_encrypt ( + unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, + const unsigned char* npub, + const unsigned char* k) { + + unsigned char s[16]; + unsigned char CNT[8]; + unsigned char T[16]; + const unsigned char* N; + unsigned char w; + unsigned long long xlen; + + skinny_ctrl l_skinny_ctrl; + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void)nsec; + N = npub; + + xlen = mlen; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = 0; + *(uint64_t*)(&s[8]) = 0; + +#else + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#endif + + reset_lfsr_gf56(CNT); + + w = 48; + + if (adlen == 0) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 0) { + w = w ^ 8; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) < 16) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 16) { + w = w ^ 0; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else { + w = w ^ 10; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + } + else while (adlen > 0) { + adlen = ad_encryption(&ad,s,k,adlen,CNT,40,&l_skinny_ctrl); + } + + if ((w & 8) == 0) { + xlen = ad2msg_encryption (&m,CNT,s,k,44,xlen,&l_skinny_ctrl); + } + else if (mlen == 0) { + lfsr_gf56(CNT); + } + while (xlen > 0) { + xlen = ad_encryption(&m,s,k,xlen,CNT,44,&l_skinny_ctrl); + } + nonce_encryption(N,CNT,s,k,w,&l_skinny_ctrl); + + // Tag generation + g8A(s, T); + + m = m - mlen; + + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = *(uint64_t*)(&T[0]); + *(uint64_t*)(&s[8]) = *(uint64_t*)(&T[8]); + +#else + + *(uint32_t*)(&s[0]) = *(uint32_t*)(&T[0]); + *(uint32_t*)(&s[4]) = *(uint32_t*)(&T[4]); + *(uint32_t*)(&s[8]) = *(uint32_t*)(&T[8]); + *(uint32_t*)(&s[12]) = *(uint32_t*)(&T[12]); + +#endif + + *clen = mlen + 16; + + if (mlen > 0) { + nonce_encryption(N,CNT,s,k,36,&l_skinny_ctrl); + while (mlen > 16) { + mlen = msg_encryption(&m,&c,N,CNT,s,k,36,mlen,&l_skinny_ctrl); + } + rho_ud16(m, c, s, mlen); + c = c + mlen; + m = m + mlen; + } + + // Tag Concatenation + c[0] = T[0]; + c[1] = T[1]; + c[2] = T[2]; + c[3] = T[3]; + c[4] = T[4]; + c[5] = T[5]; + c[6] = T[6]; + c[7] = T[7]; + c[8] = T[8]; + c[9] = T[9]; + c[10] = T[10]; + c[11] = T[11]; + c[12] = T[12]; + c[13] = T[13]; + c[14] = T[14]; + c[15] = T[15]; + + c = c - *clen; + + return 0; + +} + +int crypto_aead_decrypt( + unsigned char *m,unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c,unsigned long long clen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) { + + unsigned char s[16]; + unsigned char CNT[8]; + unsigned char T[16]; + const unsigned char* N; + unsigned char w; + unsigned long long xlen; + const unsigned char* mauth; + unsigned char* p1; + unsigned char* p2; + + skinny_ctrl l_skinny_ctrl; + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void)nsec; + mauth = m; + + N = npub; + + xlen = clen-16; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = 0; + *(uint64_t*)(&s[8]) = 0; + +#else + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#endif + + reset_lfsr_gf56(CNT); + + w = 48; + + if (adlen == 0) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 0) { + w = w ^ 8; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) < 16) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 16) { + w = w ^ 0; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else { + w = w ^ 10; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + } + else while (adlen > 0) { + adlen = ad_encryption(&ad,s,k,adlen,CNT,40,&l_skinny_ctrl); + } + + if ((w & 8) == 0) { + xlen = ad2msg_encryption (&mauth,CNT,s,k,44,xlen,&l_skinny_ctrl); + } + else if (clen == 0) { + lfsr_gf56(CNT); + } + while (xlen > 0) { + xlen = ad_encryption(&mauth,s,k,xlen,CNT,44,&l_skinny_ctrl); + } + nonce_encryption(N,CNT,s,k,w,&l_skinny_ctrl); + + // Tag generation + g8A(s, T); + + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + + p1 = T; + p2 = (unsigned char*)&c[clen - 16]; + + p1[0] = p2[0]; + p1[1] = p2[1]; + p1[2] = p2[2]; + p1[3] = p2[3]; + p1[4] = p2[4]; + p1[5] = p2[5]; + p1[6] = p2[6]; + p1[7] = p2[7]; + p1[8] = p2[8]; + p1[9] = p2[9]; + p1[10] = p2[10]; + p1[11] = p2[11]; + p1[12] = p2[12]; + p1[13] = p2[13]; + p1[14] = p2[14]; + p1[15] = p2[15]; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = *(uint64_t*)(&T[0]); + *(uint64_t*)(&s[8]) = *(uint64_t*)(&T[8]); + +#else + + *(uint32_t*)(&s[0]) = *(uint32_t*)(&T[0]); + *(uint32_t*)(&s[4]) = *(uint32_t*)(&T[4]); + *(uint32_t*)(&s[8]) = *(uint32_t*)(&T[8]); + *(uint32_t*)(&s[12]) = *(uint32_t*)(&T[12]); + +#endif + + clen = clen - 16; + *mlen = clen; + + if (clen > 0) { + nonce_encryption(N,CNT,s,k,36,&l_skinny_ctrl); + + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + while (clen > 16) { + clen = msg_decryption(&m,&c,N,CNT,s,k,36,clen,&l_skinny_ctrl); + } + irho_ud16(m, c, s, clen); + c = c + clen; + m = m + clen; + } + + for (int i = 0; i < 16; i++) { + if (T[i] != (*(c+i))) { + return -1; + } + } + + return 0; + +} diff --git a/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny.h b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny.h new file mode 100644 index 0000000..c8e7b56 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny.h @@ -0,0 +1,106 @@ +#define ___SKINNY_LOOP +//#define ___NUM_OF_ROUNDS_56 +#if (defined(__riscv_xlen) && (__riscv_xlen == 64)) +#define ___ENABLE_DWORD_CAST +#endif + +#include + +typedef struct ___skinny_ctrl { +#ifdef ___NUM_OF_ROUNDS_56 + unsigned char roundKeys[960]; // number of rounds : 56 +#else + unsigned char roundKeys[704]; // number of rounds : 40 +#endif + void (*func_skinny_128_384_enc)(unsigned char*, struct ___skinny_ctrl*, unsigned char* CNT, unsigned char* T, const unsigned char* K); +} skinny_ctrl; + +extern void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); + +#define pack_word(x0, x1, x2, x3, w) \ + w = ((x3) << 24) ^ \ + ((x2) << 16) ^ \ + ((x1) << 8) ^ \ + (x0); + +#define unpack_word(x0, x1, x2, x3, w) \ + x0 = ((w) & 0xff); \ + x1 = (((w) >> 8) & 0xff); \ + x2 = (((w) >> 16) & 0xff); \ + x3 = ((w) >> 24); + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* dw (7 6 5 4 3 2 1 0) */ \ + \ + /* dw (5 7 2 3 6 0 4 1) */ \ + \ + dt0 = dw >> 24; /* - - - 7 6 5 4 3 */ \ + dt0 = dt0 & 0x00000000ff00ff00; /* - - - - 6 - 4 - */ \ + \ + dt1 = dw << 16; /* 5 4 3 2 1 0 - - */ \ + dt1 = dt1 & 0xff00000000ff0000; /* 5 - - - - 0 - - */ \ + dt0 = dt0 ^ dt1; /* 5 - - - 6 0 4 - */ \ + \ + dt1 = dw >> 8; /* - 7 6 5 4 3 2 1 */ \ + dt1 = dt1 & 0x00ff0000000000ff; /* - 7 - - - - - 1 */ \ + dt0 = dt0 ^ dt1; /* 5 7 - - 6 0 4 1 */ \ + \ + dt1 = dw << 8; /* 6 5 4 3 2 1 0 - */ \ + dt1 = dt1 & 0x000000ff00000000; /* - - - 3 - - - - */ \ + dt0 = dt0 ^ dt1; /* 5 7 - 3 6 0 4 1 */ \ + \ + dt1 = dw << 24; /* 4 3 2 1 0 - - - */ \ + dw = dt1 & 0x0000ff0000000000; /* - - 2 - - - - - */ \ + dw = dw ^ dt0; /* 5 7 2 3 6 0 4 1 */ + +#else + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* w0 (3 2 1 0) */ \ + /* w1 (7 6 5 4) */ \ + \ + /* w0 (6 0 4 1) */ \ + /* w1 (5 7 2 3) */ \ + \ + t0 = w1 << 8; /* 6 5 4 - */ \ + t0 = t0 & 0xff00ff00; /* 6 - 4 - */ \ + \ + t1 = w1 << 16; /* 5 4 - - */ \ + t1 = t1 & 0xff000000; /* 5 - - - */ \ + \ + t2 = w1 & 0xff000000; /* 7 - - - */ \ + t2 = t2 >> 8; /* - 7 - - */ \ + t1 = t1 ^ t2; /* 5 7 - - */ \ + \ + t2 = w0 & 0xff000000; /* 3 - - - */ \ + t2 = t2 >> 24; /* - - - 3 */ \ + t1 = t1 ^ t2; /* 5 7 - 3 */ \ + \ + w1 = w0 >> 8; /* - 3 2 1 */ \ + w1 = w1 & 0x0000ff00; /* - - 2 - */ \ + w1 = w1 ^ t1; /* 5 7 2 3 */ \ + \ + t2 = w0 & 0x0000ff00; /* - - 1 - */ \ + t2 = t2 >> 8; /* - - - 1 */ \ + t0 = t0 ^ t2; /* 6 - 4 1 */ \ + \ + w0 = w0 << 16; /* 1 0 - - */ \ + w0 = w0 & 0x00ff0000; /* - 0 - - */ \ + w0 = w0 ^ t0; /* 6 0 4 1 */ + +#endif + diff --git a/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_key_schedule2.c b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_key_schedule2.c new file mode 100644 index 0000000..c2f30de --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_key_schedule2.c @@ -0,0 +1,431 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * load * AC(c0 c1) ^ TK3 + * calc AC(c0 c1) ^ TK2 -> store + * ART(TK2) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + dw = ((dw << 1) & 0xfefefefefefefefe) ^ \ + (((dw >> 7) ^ (dw >> 5)) & 0x0101010101010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2 = dw ^ *tk3; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint64_t* tk2; // used in MACRO + uint64_t* tk3; // used in MACRO + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + // load master key + dw = *(uint64_t*)&roundKeys[16]; + + tk2 = (uint64_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2 = dw ^ *tk3; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[24]; + + tk2 = (uint64_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint64_t* tk2; // used in MACRO + uint64_t* tk3; // used in MACRO + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[16]; + + tk2 = (uint64_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2 = dw ^ *tk3; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[24]; + + tk2 = (uint64_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + w0 = ((w0 << 1) & 0xfefefefe) ^ \ + (((w0 >> 7) ^ (w0 >> 5)) & 0x01010101); \ + w1 = ((w1 << 1) & 0xfefefefe) ^ \ + (((w1 >> 7) ^ (w1 >> 5)) & 0x01010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2++ = w0 ^ *tk3++; \ + *tk2++ = w1 ^ *tk3++; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[16]; + w1 = *(uint32_t*)&roundKeys[20]; + + tk2 = (uint32_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[24]; + w1 = *(uint32_t*)&roundKeys[28]; + + tk2 = (uint32_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[16]; + w1 = *(uint32_t*)&roundKeys[20]; + + tk2 = (uint32_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[24]; + w1 = *(uint32_t*)&roundKeys[28]; + + tk2 = (uint32_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_key_schedule3.c b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_key_schedule3.c new file mode 100644 index 0000000..5dcaf7f --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_key_schedule3.c @@ -0,0 +1,428 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * AC(c0 c1) ^ TK3 -> store + * ART(TK3) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + dw = ((dw >> 1) & 0x7f7f7f7f7f7f7f7f) ^ \ + (((dw << 7) ^ (dw << 1)) & 0x8080808080808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + dt0 = dw ^ c0Val; \ + *tk3 = dt0 ^ ((uint64_t)c1Val << 40); \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys) +{ + uint64_t *tk3; + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[32]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = dw ^ 0x01; + tk3 += 1; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x0); + PERMUTATION_TK3(0xf, 0x1); + PERMUTATION_TK3(0xd, 0x3); + PERMUTATION_TK3(0x7, 0x3); + PERMUTATION_TK3(0xe, 0x1); + PERMUTATION_TK3(0x9, 0x3); + PERMUTATION_TK3(0x7, 0x2); + PERMUTATION_TK3(0xd, 0x1); + PERMUTATION_TK3(0x5, 0x3); + + PERMUTATION_TK3(0x6, 0x1); + PERMUTATION_TK3(0x8, 0x1); + PERMUTATION_TK3(0x1, 0x2); + PERMUTATION_TK3(0x5, 0x0); + PERMUTATION_TK3(0x7, 0x1); + PERMUTATION_TK3(0xc, 0x1); + PERMUTATION_TK3(0x1, 0x3); + PERMUTATION_TK3(0x6, 0x0); + PERMUTATION_TK3(0xb, 0x1); + PERMUTATION_TK3(0xd, 0x2); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x3); + PERMUTATION_TK3(0x2, 0x1); + PERMUTATION_TK3(0x8, 0x0); + PERMUTATION_TK3(0x2, 0x2); + PERMUTATION_TK3(0x9, 0x0); + PERMUTATION_TK3(0x6, 0x2); + PERMUTATION_TK3(0x9, 0x1); + PERMUTATION_TK3(0x5, 0x2); + +#endif + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[40]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x0); + PERMUTATION_TK3(0xf, 0x0); + PERMUTATION_TK3(0xe, 0x3); + PERMUTATION_TK3(0xb, 0x3); + PERMUTATION_TK3(0xf, 0x2); + PERMUTATION_TK3(0xc, 0x3); + PERMUTATION_TK3(0x3, 0x3); + PERMUTATION_TK3(0xe, 0x0); + PERMUTATION_TK3(0xa, 0x3); + PERMUTATION_TK3(0xb, 0x2); + + PERMUTATION_TK3(0xc, 0x2); + PERMUTATION_TK3(0x0, 0x3); + PERMUTATION_TK3(0x2, 0x0); + PERMUTATION_TK3(0xb, 0x0); + PERMUTATION_TK3(0xe, 0x2); + PERMUTATION_TK3(0x8, 0x3); + PERMUTATION_TK3(0x3, 0x2); + PERMUTATION_TK3(0xd, 0x0); + PERMUTATION_TK3(0x6, 0x3); + PERMUTATION_TK3(0xa, 0x1); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x2); + PERMUTATION_TK3(0x4, 0x2); + PERMUTATION_TK3(0x1, 0x1); + PERMUTATION_TK3(0x4, 0x0); + PERMUTATION_TK3(0x3, 0x1); + PERMUTATION_TK3(0xc, 0x0); + PERMUTATION_TK3(0x2, 0x3); + PERMUTATION_TK3(0xa, 0x0); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC) +{ + uint64_t *tk3; + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + uint64_t c0; + uint64_t c1; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[32]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = dw ^ 0x01; + tk3 += 1; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[40]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = (uint64_t*)&roundKeys[392]; +#else + pRC -= 110; + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + w0 = ((w0 >> 1) & 0x7f7f7f7f) ^ \ + (((w0 << 7) ^ (w0 << 1)) & 0x80808080); \ + w1 = ((w1 >> 1) & 0x7f7f7f7f) ^ \ + (((w1 << 7) ^ (w1 << 1)) & 0x80808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + *tk3++ = w0 ^ c0Val; \ + *tk3++ = w1 ^ ((uint32_t)c1Val << 8); \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[32]; + w1 = *(uint32_t*)&roundKeys[36]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x0); + PERMUTATION_TK3(0xf, 0x1); + PERMUTATION_TK3(0xd, 0x3); + PERMUTATION_TK3(0x7, 0x3); + PERMUTATION_TK3(0xe, 0x1); + PERMUTATION_TK3(0x9, 0x3); + PERMUTATION_TK3(0x7, 0x2); + PERMUTATION_TK3(0xd, 0x1); + PERMUTATION_TK3(0x5, 0x3); + + PERMUTATION_TK3(0x6, 0x1); + PERMUTATION_TK3(0x8, 0x1); + PERMUTATION_TK3(0x1, 0x2); + PERMUTATION_TK3(0x5, 0x0); + PERMUTATION_TK3(0x7, 0x1); + PERMUTATION_TK3(0xc, 0x1); + PERMUTATION_TK3(0x1, 0x3); + PERMUTATION_TK3(0x6, 0x0); + PERMUTATION_TK3(0xb, 0x1); + PERMUTATION_TK3(0xd, 0x2); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x3); + PERMUTATION_TK3(0x2, 0x1); + PERMUTATION_TK3(0x8, 0x0); + PERMUTATION_TK3(0x2, 0x2); + PERMUTATION_TK3(0x9, 0x0); + PERMUTATION_TK3(0x6, 0x2); + PERMUTATION_TK3(0x9, 0x1); + PERMUTATION_TK3(0x5, 0x2); + +#endif + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[40]; + w1 = *(uint32_t*)&roundKeys[44]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x0); + PERMUTATION_TK3(0xf, 0x0); + PERMUTATION_TK3(0xe, 0x3); + PERMUTATION_TK3(0xb, 0x3); + PERMUTATION_TK3(0xf, 0x2); + PERMUTATION_TK3(0xc, 0x3); + PERMUTATION_TK3(0x3, 0x3); + PERMUTATION_TK3(0xe, 0x0); + PERMUTATION_TK3(0xa, 0x3); + PERMUTATION_TK3(0xb, 0x2); + + PERMUTATION_TK3(0xc, 0x2); + PERMUTATION_TK3(0x0, 0x3); + PERMUTATION_TK3(0x2, 0x0); + PERMUTATION_TK3(0xb, 0x0); + PERMUTATION_TK3(0xe, 0x2); + PERMUTATION_TK3(0x8, 0x3); + PERMUTATION_TK3(0x3, 0x2); + PERMUTATION_TK3(0xd, 0x0); + PERMUTATION_TK3(0x6, 0x3); + PERMUTATION_TK3(0xa, 0x1); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x2); + PERMUTATION_TK3(0x4, 0x2); + PERMUTATION_TK3(0x1, 0x1); + PERMUTATION_TK3(0x4, 0x0); + PERMUTATION_TK3(0x3, 0x1); + PERMUTATION_TK3(0xc, 0x0); + PERMUTATION_TK3(0x2, 0x3); + PERMUTATION_TK3(0xa, 0x0); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + uint32_t c0; + uint32_t c1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[32]; + w1 = *(uint32_t*)&roundKeys[36]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[40]; + w1 = *(uint32_t*)&roundKeys[44]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = (uint32_t*)&roundKeys[392]; +#else + pRC -= 110; + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_main.c b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_main.c new file mode 100644 index 0000000..8a6e75f --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1+/opt32a_NEC/skinny_main.c @@ -0,0 +1,675 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * ART(TK1) -> store + * load AC(c0 c1) ^ TK3 ^ TK2 + * load TK1 + * calc AC(c0 c1) ^ TK3 ^ TK2 ^ TK1 -> use at (AC->ART) + * SC->SR->(AC->ART)->MC + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +/* + * S-BOX + */ +unsigned char SBOX[] += { + // Original + 0x65, 0x4c, 0x6a, 0x42, 0x4b, 0x63, 0x43, 0x6b, 0x55, 0x75, 0x5a, 0x7a, 0x53, 0x73, 0x5b, 0x7b, + 0x35, 0x8c, 0x3a, 0x81, 0x89, 0x33, 0x80, 0x3b, 0x95, 0x25, 0x98, 0x2a, 0x90, 0x23, 0x99, 0x2b, + 0xe5, 0xcc, 0xe8, 0xc1, 0xc9, 0xe0, 0xc0, 0xe9, 0xd5, 0xf5, 0xd8, 0xf8, 0xd0, 0xf0, 0xd9, 0xf9, + 0xa5, 0x1c, 0xa8, 0x12, 0x1b, 0xa0, 0x13, 0xa9, 0x05, 0xb5, 0x0a, 0xb8, 0x03, 0xb0, 0x0b, 0xb9, + 0x32, 0x88, 0x3c, 0x85, 0x8d, 0x34, 0x84, 0x3d, 0x91, 0x22, 0x9c, 0x2c, 0x94, 0x24, 0x9d, 0x2d, + 0x62, 0x4a, 0x6c, 0x45, 0x4d, 0x64, 0x44, 0x6d, 0x52, 0x72, 0x5c, 0x7c, 0x54, 0x74, 0x5d, 0x7d, + 0xa1, 0x1a, 0xac, 0x15, 0x1d, 0xa4, 0x14, 0xad, 0x02, 0xb1, 0x0c, 0xbc, 0x04, 0xb4, 0x0d, 0xbd, + 0xe1, 0xc8, 0xec, 0xc5, 0xcd, 0xe4, 0xc4, 0xed, 0xd1, 0xf1, 0xdc, 0xfc, 0xd4, 0xf4, 0xdd, 0xfd, + 0x36, 0x8e, 0x38, 0x82, 0x8b, 0x30, 0x83, 0x39, 0x96, 0x26, 0x9a, 0x28, 0x93, 0x20, 0x9b, 0x29, + 0x66, 0x4e, 0x68, 0x41, 0x49, 0x60, 0x40, 0x69, 0x56, 0x76, 0x58, 0x78, 0x50, 0x70, 0x59, 0x79, + 0xa6, 0x1e, 0xaa, 0x11, 0x19, 0xa3, 0x10, 0xab, 0x06, 0xb6, 0x08, 0xba, 0x00, 0xb3, 0x09, 0xbb, + 0xe6, 0xce, 0xea, 0xc2, 0xcb, 0xe3, 0xc3, 0xeb, 0xd6, 0xf6, 0xda, 0xfa, 0xd3, 0xf3, 0xdb, 0xfb, + 0x31, 0x8a, 0x3e, 0x86, 0x8f, 0x37, 0x87, 0x3f, 0x92, 0x21, 0x9e, 0x2e, 0x97, 0x27, 0x9f, 0x2f, + 0x61, 0x48, 0x6e, 0x46, 0x4f, 0x67, 0x47, 0x6f, 0x51, 0x71, 0x5e, 0x7e, 0x57, 0x77, 0x5f, 0x7f, + 0xa2, 0x18, 0xae, 0x16, 0x1f, 0xa7, 0x17, 0xaf, 0x01, 0xb2, 0x0e, 0xbe, 0x07, 0xb7, 0x0f, 0xbf, + 0xe2, 0xca, 0xee, 0xc6, 0xcf, 0xe7, 0xc7, 0xef, 0xd2, 0xf2, 0xde, 0xfe, 0xd7, 0xf7, 0xdf, 0xff, +}; + + /* + * S-BOX ^ AC(c2) + */ +unsigned char SBOX2[] += { // Original ^ c2(0x02) + 0x67, 0x4e, 0x68, 0x40, 0x49, 0x61, 0x41, 0x69, 0x57, 0x77, 0x58, 0x78, 0x51, 0x71, 0x59, 0x79, + 0x37, 0x8e, 0x38, 0x83, 0x8b, 0x31, 0x82, 0x39, 0x97, 0x27, 0x9a, 0x28, 0x92, 0x21, 0x9b, 0x29, + 0xe7, 0xce, 0xea, 0xc3, 0xcb, 0xe2, 0xc2, 0xeb, 0xd7, 0xf7, 0xda, 0xfa, 0xd2, 0xf2, 0xdb, 0xfb, + 0xa7, 0x1e, 0xaa, 0x10, 0x19, 0xa2, 0x11, 0xab, 0x07, 0xb7, 0x08, 0xba, 0x01, 0xb2, 0x09, 0xbb, + 0x30, 0x8a, 0x3e, 0x87, 0x8f, 0x36, 0x86, 0x3f, 0x93, 0x20, 0x9e, 0x2e, 0x96, 0x26, 0x9f, 0x2f, + 0x60, 0x48, 0x6e, 0x47, 0x4f, 0x66, 0x46, 0x6f, 0x50, 0x70, 0x5e, 0x7e, 0x56, 0x76, 0x5f, 0x7f, + 0xa3, 0x18, 0xae, 0x17, 0x1f, 0xa6, 0x16, 0xaf, 0x00, 0xb3, 0x0e, 0xbe, 0x06, 0xb6, 0x0f, 0xbf, + 0xe3, 0xca, 0xee, 0xc7, 0xcf, 0xe6, 0xc6, 0xef, 0xd3, 0xf3, 0xde, 0xfe, 0xd6, 0xf6, 0xdf, 0xff, + 0x34, 0x8c, 0x3a, 0x80, 0x89, 0x32, 0x81, 0x3b, 0x94, 0x24, 0x98, 0x2a, 0x91, 0x22, 0x99, 0x2b, + 0x64, 0x4c, 0x6a, 0x43, 0x4b, 0x62, 0x42, 0x6b, 0x54, 0x74, 0x5a, 0x7a, 0x52, 0x72, 0x5b, 0x7b, + 0xa4, 0x1c, 0xa8, 0x13, 0x1b, 0xa1, 0x12, 0xa9, 0x04, 0xb4, 0x0a, 0xb8, 0x02, 0xb1, 0x0b, 0xb9, + 0xe4, 0xcc, 0xe8, 0xc0, 0xc9, 0xe1, 0xc1, 0xe9, 0xd4, 0xf4, 0xd8, 0xf8, 0xd1, 0xf1, 0xd9, 0xf9, + 0x33, 0x88, 0x3c, 0x84, 0x8d, 0x35, 0x85, 0x3d, 0x90, 0x23, 0x9c, 0x2c, 0x95, 0x25, 0x9d, 0x2d, + 0x63, 0x4a, 0x6c, 0x44, 0x4d, 0x65, 0x45, 0x6d, 0x53, 0x73, 0x5c, 0x7c, 0x55, 0x75, 0x5d, 0x7d, + 0xa0, 0x1a, 0xac, 0x14, 0x1d, 0xa5, 0x15, 0xad, 0x03, 0xb0, 0x0c, 0xbc, 0x05, 0xb5, 0x0d, 0xbd, + 0xe0, 0xc8, 0xec, 0xc4, 0xcd, 0xe5, 0xc5, 0xed, 0xd0, 0xf0, 0xdc, 0xfc, 0xd5, 0xf5, 0xdd, 0xfd, +}; + +#ifdef ___SKINNY_LOOP +/* + * Round Constants + */ +unsigned char RC[] += { + 0x01, 0x00, 0x03, 0x00, 0x07, 0x00, 0x0f, 0x00, 0x0f, 0x01, 0x0e, 0x03, 0x0d, 0x03, 0x0b, 0x03, + 0x07, 0x03, 0x0f, 0x02, 0x0e, 0x01, 0x0c, 0x03, 0x09, 0x03, 0x03, 0x03, 0x07, 0x02, 0x0e, 0x00, + 0x0d, 0x01, 0x0a, 0x03, 0x05, 0x03, 0x0b, 0x02, 0x06, 0x01, 0x0c, 0x02, 0x08, 0x01, 0x00, 0x03, + 0x01, 0x02, 0x02, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x07, 0x01, 0x0e, 0x02, 0x0c, 0x01, 0x08, 0x03, + 0x01, 0x03, 0x03, 0x02, 0x06, 0x00, 0x0d, 0x00, 0x0b, 0x01, 0x06, 0x03, 0x0d, 0x02, 0x0a, 0x01, +#ifdef ___NUM_OF_ROUNDS_56 + 0x04, 0x03, 0x09, 0x02, 0x02, 0x01, 0x04, 0x02, 0x08, 0x00, 0x01, 0x01, 0x02, 0x02, 0x04, 0x00, + 0x09, 0x00, 0x03, 0x01, 0x06, 0x02, 0x0c, 0x00, 0x09, 0x01, 0x02, 0x03, 0x05, 0x02, 0x0a, 0x00, +#endif + }; +#endif + +extern void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2); +extern void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys); +#ifdef ___SKINNY_LOOP +extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC); +#else +extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys); +#endif + +void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + uint32_t *pt = (uint32_t*)&pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + pt[8] = *(uint32_t*)(&K[0]); + pack_word(K[7], K[4], K[5], K[6], pt[9]); + pt[10] = *(uint32_t*)(&K[8]); + pack_word(K[15], K[12], K[13], K[14], pt[11]); + +#ifdef ___SKINNY_LOOP + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys, RC); +#else + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys); +#endif + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + + pskinny_ctrl->func_skinny_128_384_enc = skinny_128_384_enc12_12; + +} + +void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)T; + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +#define PERMUTATION_TK1() \ + \ +/* permutation */ \ +{ \ + unsigned char tmp0 = roundKeys[0]; \ + unsigned char tmp1 = roundKeys[1]; \ + unsigned char tmp2 = roundKeys[2]; \ + unsigned char tmp3 = roundKeys[3]; \ + unsigned char tmp4 = roundKeys[4]; \ + unsigned char tmp5 = roundKeys[5]; \ + unsigned char tmp6 = roundKeys[6]; \ + unsigned char tmp7 = roundKeys[7]; \ + \ + unsigned char* dst = &roundKeys[8]; \ + \ + /* 5 7 2 3 6 0 4 1 */ \ + *dst++ = tmp1; \ + *dst++ = tmp4; \ + *dst++ = tmp0; \ + *dst++ = tmp6; \ + *dst++ = tmp3; \ + *dst++ = tmp2; \ + *dst++ = tmp7; \ + *dst++ = tmp5; \ + \ + /* 2 5 0 6 7 1 3 4 */ \ + *dst++ = tmp4; \ + *dst++ = tmp3; \ + *dst++ = tmp1; \ + *dst++ = tmp7; \ + *dst++ = tmp6; \ + *dst++ = tmp0; \ + *dst++ = tmp5; \ + *dst++ = tmp2; \ + \ + /* 0 2 1 7 5 4 6 3 */ \ + *dst++ = tmp3; \ + *dst++ = tmp6; \ + *dst++ = tmp4; \ + *dst++ = tmp5; \ + *dst++ = tmp7; \ + *dst++ = tmp1; \ + *dst++ = tmp2; \ + *dst++ = tmp0; \ + \ + /* 1 0 4 5 2 3 7 6 */ \ + *dst++ = tmp6; \ + *dst++ = tmp7; \ + *dst++ = tmp3; \ + *dst++ = tmp2; \ + *dst++ = tmp5; \ + *dst++ = tmp4; \ + *dst++ = tmp0; \ + *dst++ = tmp1; \ + \ + /* 4 1 3 2 0 6 5 7 */ \ + *dst++ = tmp7; \ + *dst++ = tmp5; \ + *dst++ = tmp6; \ + *dst++ = tmp0; \ + *dst++ = tmp2; \ + *dst++ = tmp3; \ + *dst++ = tmp1; \ + *dst++ = tmp4; \ + \ + /* 3 4 6 0 1 7 2 5 */ \ + *dst++ = tmp5; \ + *dst++ = tmp2; \ + *dst++ = tmp7; \ + *dst++ = tmp1; \ + *dst++ = tmp0; \ + *dst++ = tmp6; \ + *dst++ = tmp4; \ + *dst++ = tmp3; \ + \ + /* 6 3 7 1 4 5 0 2 */ \ + *dst++ = tmp2; \ + *dst++ = tmp0; \ + *dst++ = tmp5; \ + *dst++ = tmp4; \ + *dst++ = tmp1; \ + *dst++ = tmp7; \ + *dst++ = tmp3; \ + *dst++ = tmp6; \ +} + +#define SBOX_0(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t0; \ + b1 = (uint8_t)t1; \ + b2 = (uint8_t)t2; \ + b3 = (uint8_t)t3; + +#define SBOX_8(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t3; \ + b1 = (uint8_t)t0; \ + b2 = (uint8_t)t1; \ + b3 = (uint8_t)t2; + +#define SBOX_16(b0, b1, b2, b3) \ + \ + t0 = sbox2[b0]; /* AC(c2) */ \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t2; \ + b1 = (uint8_t)t3; \ + b2 = (uint8_t)t0; \ + b3 = (uint8_t)t1; + +#define SBOX_24(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t1; \ + b1 = (uint8_t)t2; \ + b2 = (uint8_t)t3; \ + b3 = (uint8_t)t0; + +#ifdef ___ENABLE_DWORD_CAST + +#define SKINNY_MAIN() \ +{ \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK1^TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint64_t*)&block[0]; \ + t1 ^= *tk1++; \ + t1 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint64_t*)&block[8]; \ + t0 = t2 >> 32; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = (t1 >> 32) ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t0 ^ t3; \ + \ + *(uint32_t*)&block[0] = (uint32_t)t0; \ + *(uint32_t*)&block[4] = (uint32_t)t1; \ + *(uint32_t*)&block[8] = (uint32_t)t2; \ + *(uint32_t*)&block[12] = (uint32_t)t3; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint64_t*)&block[0]; \ + t1 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint64_t*)&block[8]; \ + t0 = t2 >> 32; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = (t1 >> 32) ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t0 ^ t3; \ + \ + *(uint32_t*)&block[0] = (uint32_t)t0; \ + *(uint32_t*)&block[4] = (uint32_t)t1; \ + *(uint32_t*)&block[8] = (uint32_t)t2; \ + *(uint32_t*)&block[12] = (uint32_t)t3; \ +} + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint64_t *tk1; + uint64_t *tk2; + uint64_t t0; // used in MACRO + uint64_t t1; // used in MACRO + uint64_t t2; // used in MACRO + uint64_t t3; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint64_t*)&roundKeys[64]; + tk1 = (uint64_t*)&roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint64_t *tk1; + uint64_t *tk2; + uint64_t t0; // used in MACRO + uint64_t t1; // used in MACRO + uint64_t t2; // used in MACRO + uint64_t t3; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint64_t*)&roundKeys[64]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = (uint64_t*)&roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = (uint64_t*)&roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define SKINNY_MAIN() \ +{ \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK1^TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint32_t*)&block[0]; \ + t0 = *(uint32_t*)&block[4]; \ + t1 ^= *tk1++; \ + t1 ^= *tk2++; \ + t0 ^= *tk1++; \ + t0 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint32_t*)&block[8]; \ + t4 = *(uint32_t*)&block[12]; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = t0 ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t3 ^ t4; \ + \ + *(uint32_t*)&block[0] = t0; \ + *(uint32_t*)&block[4] = t1; \ + *(uint32_t*)&block[8] = t2; \ + *(uint32_t*)&block[12] = t3; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint32_t*)&block[0]; \ + t0 = *(uint32_t*)&block[4]; \ + t1 ^= *tk2++; \ + t0 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint32_t*)&block[8]; \ + t4 = *(uint32_t*)&block[12]; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = t0 ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t3 ^ t4; \ + \ + *(uint32_t*)&block[0] = t0; \ + *(uint32_t*)&block[4] = t1; \ + *(uint32_t*)&block[8] = t2; \ + *(uint32_t*)&block[12] = t3; \ +} + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t t4; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint32_t*)&roundKeys[64]; + tk1 = (uint32_t*)&roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t t4; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint32_t*)&roundKeys[64]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = (uint32_t*)&roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = (uint32_t*)&roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/api.h b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/encrypt.c b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/encrypt.c new file mode 100644 index 0000000..495399b --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/encrypt.c @@ -0,0 +1,1337 @@ +/* + * Date: 29 November 2018 + * Contact: Thomas Peyrin - thomas.peyrin@gmail.com + * Mustafa Khairallah - mustafam001@e.ntu.edu.sg + */ + +#include "crypto_aead.h" +#include "api.h" +#include "skinny.h" +#include +#include + +void pad (const unsigned char* m, unsigned char* mp, int len8) { + +#ifdef ___ENABLE_DWORD_CAST + + if (0 == len8) { + *(uint64_t*)(&mp[0]) = 0; + *(uint64_t*)(&mp[8]) = 0; + } else if (8 > len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]) & (0xffffffffffffffff >> (64 - len8*8)); + *(uint64_t*)(&mp[8]) = 0; + mp[15] = len8; + } else if (8 == len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = 0; + mp[15] = 8; + } else if (16 > len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]) & (0xffffffffffffffff >> (128 - len8*8)); + mp[15] = len8; + } else { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]); + } + +#else + + if (0 == len8) { + *(uint32_t*)(&mp[0]) = 0; + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + } else if (4 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]) & (0xffffffff >> (32 - len8*8)); + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (4 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 4; + } else if (8 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]) & (0xffffffff >> (64 - len8*8)); + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (8 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 8; + } else if (12 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]) & (0xffffffff >> (96 - len8*8)); + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (12 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 12; + } else if (16 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]) & (0xffffffff >> (128 - len8*8)); + mp[15] = len8; + } else { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]); + } + +#endif + +} + +void g8A (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t c0, c1; + + c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080); + + *(uint64_t*)(&c[0]) = c0; + *(uint64_t*)(&c[8]) = c1; + +#else + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#endif + +} + +void g8A_for_Tag_Generation (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t c0, c1; + + c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = (c0>>24)&0xFF; + c[4] = (c0>>32)&0xFF; + c[5] = (c0>>40)&0xFF; + c[6] = (c0>>48)&0xFF; + c[7] = c0>>56; + c[8] = c1 &0xFF; + c[9] = (c1>>8) &0xFF; + c[10] = (c1>>16)&0xFF; + c[11] = (c1>>24)&0xFF; + c[12] = (c1>>32)&0xFF; + c[13] = (c1>>40)&0xFF; + c[14] = (c1>>48)&0xFF; + c[15] = c1>>56; + +#else + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = c0>>24; + c[4] = c1 &0xFF; + c[5] = (c1>>8) &0xFF; + c[6] = (c1>>16)&0xFF; + c[7] = c1>>24; + c[8] = c2 &0xFF; + c[9] = (c2>>8) &0xFF; + c[10] = (c2>>16)&0xFF; + c[11] = c2>>24; + c[12] = c3 &0xFF; + c[13] = (c3>>8) &0xFF; + c[14] = (c3>>16)&0xFF; + c[15] = c3>>24; + +#endif + +} + +void rho_ad_eqov16 ( + const unsigned char* m, + unsigned char* s) { + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) ^= *(uint64_t*)(&m[0]); + *(uint64_t*)(&s[8]) ^= *(uint64_t*)(&m[8]); + +#else + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&m[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&m[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&m[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&m[12]); + +#endif + +} + +void rho_ad_ud16 ( + const unsigned char* m, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + pad(m,mp,len8); + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) ^= *(uint64_t*)(&mp[0]); + *(uint64_t*)(&s[8]) ^= *(uint64_t*)(&mp[8]); + +#else + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&mp[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&mp[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&mp[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&mp[12]); + +#endif + +} + +void rho_eqov16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s) { + + g8A(s,c); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t c0 = *(uint64_t*)(&c[0]); + uint64_t c1 = *(uint64_t*)(&c[8]); + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t m0 = *(uint64_t*)(&m[0]); + uint64_t m1 = *(uint64_t*)(&m[8]); + + s0 ^= m0; + s1 ^= m1; + + c0 ^= m0; + c1 ^= m1; + + *(uint64_t*)(&s[0]) = s0; + *(uint64_t*)(&s[8]) = s1; + + *(uint64_t*)(&c[0]) = c0; + *(uint64_t*)(&c[8]) = c1; + +#else + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= m0; + s1 ^= m1; + s2 ^= m2; + s3 ^= m3; + + c0 ^= m0; + c1 ^= m1; + c2 ^= m2; + c3 ^= m3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#endif + +} + +void rho_ud16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + + pad(m,mp,len8); + + g8A(s,c); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t mp0 = *(uint64_t*)&mp[0]; + uint64_t mp1 = *(uint64_t*)&mp[8]; + uint64_t c0 = *(uint64_t*)&c[0]; + uint64_t c1 = *(uint64_t*)&c[8]; + + *(uint64_t*)(&s[0]) ^= mp0; + *(uint64_t*)(&s[8]) ^= mp1; + + if (0 == len8) { + c0 = 0; + c1 = 0; + } else if (8 > len8) { + c0 = c0 ^ (mp0 & 0xffffffffffffffff >> (64 - (len8*8))); + c0 = c0 ^ (c0 & 0xffffffffffffffff << ( (len8*8))); + c1 = 0; + } else if (8 == len8) { + c0 = c0 ^ mp0; + c1 = 0; + } else if (16 > len8) { + len8 -= 8; + c0 = c0 ^ mp0; + c1 = c1 ^ (mp1 & 0xffffffffffffffff >> (64 - (len8*8))); + c1 = c1 ^ (c1 & 0xffffffffffffffff << ( (len8*8))); + } else { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + } + + *(uint64_t*)&c[0] = c0; + *(uint64_t*)&c[8] = c1; + +#else + + uint32_t mp0 = *(uint32_t*)&mp[0]; + uint32_t mp1 = *(uint32_t*)&mp[4]; + uint32_t mp2 = *(uint32_t*)&mp[8]; + uint32_t mp3 = *(uint32_t*)&mp[12]; + uint32_t c0 = *(uint32_t*)&c[0]; + uint32_t c1 = *(uint32_t*)&c[4]; + uint32_t c2 = *(uint32_t*)&c[8]; + uint32_t c3 = *(uint32_t*)&c[12]; + + *(uint32_t*)(&s[0]) ^= mp0; + *(uint32_t*)(&s[4]) ^= mp1; + *(uint32_t*)(&s[8]) ^= mp2; + *(uint32_t*)(&s[12]) ^= mp3; + + if (0 == len8) { + c0 = 0; + c1 = 0; + c2 = 0; + c3 = 0; + } else if (4 > len8) { + c0 = c0 ^ (mp0 & 0xffffffff >> (32 - (len8*8))); + c0 = c0 ^ (c0 & 0xffffffff << ( (len8*8))); + c1 = 0; + c2 = 0; + c3 = 0; + } else if (4 == len8) { + c0 = c0 ^ mp0; + c1 = 0; + c2 = 0; + c3 = 0; + } else if (8 > len8) { + len8 -= 4; + c0 = c0 ^ mp0; + c1 = c1 ^ (mp1 & 0xffffffff >> (32 - (len8*8))); + c1 = c1 ^ (c1 & 0xffffffff << ( (len8*8))); + c2 = 0; + c3 = 0; + } else if (8 == len8) { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = 0; + c3 = 0; + } else if (12 > len8) { + len8 -= 8; + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ (mp2 & 0xffffffff >> (32 - (len8*8))); + c2 = c2 ^ (c2 & 0xffffffff << ( (len8*8))); + c3 = 0; + } else if (12 == len8) { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = 0; + } else if (16 > len8) { + len8 -= 12; + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = c3 ^ (mp3 & 0xffffffff >> (32 - (len8*8))); + c3 = c3 ^ (c3 & 0xffffffff << ( (len8*8))); + } else { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = c3 ^ mp3; + } + + *(uint32_t*)&c[0] = c0; + *(uint32_t*)&c[4] = c1; + *(uint32_t*)&c[8] = c2; + *(uint32_t*)&c[12] = c3; + +#endif + +} + +void irho_eqov16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s) { + + g8A(s,m); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t c0 = *(uint64_t*)(&c[0]); + uint64_t c1 = *(uint64_t*)(&c[8]); + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t m0 = *(uint64_t*)(&m[0]); + uint64_t m1 = *(uint64_t*)(&m[8]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + + m0 ^= c0; + m1 ^= c1; + + *(uint64_t*)(&s[0]) = s0; + *(uint64_t*)(&s[8]) = s1; + + *(uint64_t*)(&m[0]) = m0; + *(uint64_t*)(&m[8]) = m1; + +#else + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + s2 ^= c2 ^ m2; + s3 ^= c3 ^ m3; + + m0 ^= c0; + m1 ^= c1; + m2 ^= c2; + m3 ^= c3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&m[0]) = m0; + *(uint32_t*)(&m[4]) = m1; + *(uint32_t*)(&m[8]) = m2; + *(uint32_t*)(&m[12]) = m3; + +#endif + +} + +void irho_ud16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char cp [16]; + + pad(c,cp,len8); + + g8A(s,m); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t cp0 = *(uint64_t*)&cp[0]; + uint64_t cp1 = *(uint64_t*)&cp[8]; + uint64_t m0 = *(uint64_t*)&m[0]; + uint64_t m1 = *(uint64_t*)&m[8]; + uint64_t s0 = *(uint64_t*)&s[0]; + uint64_t s1 = *(uint64_t*)&s[8]; + + s0 ^= cp0; + s1 ^= cp1; + + if (0 == len8) { + m0 = 0; + m1 = 0; + } else if (8 > len8) { + s0 = s0 ^ (m0 & 0xffffffffffffffff >> (64 - (len8*8))); + + m0 = m0 ^ (cp0 & 0xffffffffffffffff >> (64 - (len8*8))); + m0 = m0 ^ (m0 & 0xffffffffffffffff << ( (len8*8))); + m1 = 0; + } else if (8 == len8) { + s0 = s0 ^ m0; + + m0 = m0 ^ cp0; + m1 = 0; + } else if (16 > len8) { + len8 -= 8; + s0 = s0 ^ m0; + s1 = s1 ^ (m1 & 0xffffffffffffffff >> (64 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ (cp1 & 0xffffffffffffffff >> (64 - (len8*8))); + m1 = m1 ^ (m1 & 0xffffffffffffffff << ( (len8*8))); + } else { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + } + + *(uint64_t*)&s[0] = s0; + *(uint64_t*)&s[8] = s1; + *(uint64_t*)&m[0] = m0; + *(uint64_t*)&m[8] = m1; + +#else + + uint32_t cp0 = *(uint32_t*)&cp[0]; + uint32_t cp1 = *(uint32_t*)&cp[4]; + uint32_t cp2 = *(uint32_t*)&cp[8]; + uint32_t cp3 = *(uint32_t*)&cp[12]; + uint32_t m0 = *(uint32_t*)&m[0]; + uint32_t m1 = *(uint32_t*)&m[4]; + uint32_t m2 = *(uint32_t*)&m[8]; + uint32_t m3 = *(uint32_t*)&m[12]; + uint32_t s0 = *(uint32_t*)&s[0]; + uint32_t s1 = *(uint32_t*)&s[4]; + uint32_t s2 = *(uint32_t*)&s[8]; + uint32_t s3 = *(uint32_t*)&s[12]; + + s0 ^= cp0; + s1 ^= cp1; + s2 ^= cp2; + s3 ^= cp3; + + if (0 == len8) { + m0 = 0; + m1 = 0; + m2 = 0; + m3 = 0; + } else if (4 > len8) { + s0 = s0 ^ (m0 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ (cp0 & 0xffffffff >> (32 - (len8*8))); + m0 = m0 ^ (m0 & 0xffffffff << ( (len8*8))); + m1 = 0; + m2 = 0; + m3 = 0; + } else if (4 == len8) { + s0 = s0 ^ m0; + + m0 = m0 ^ cp0; + m1 = 0; + m2 = 0; + m3 = 0; + } else if (8 > len8) { + len8 -= 4; + s0 = s0 ^ m0; + s1 = s1 ^ (m1 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ (cp1 & 0xffffffff >> (32 - (len8*8))); + m1 = m1 ^ (m1 & 0xffffffff << ( (len8*8))); + m2 = 0; + m3 = 0; + } else if (8 == len8) { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = 0; + m3 = 0; + } else if (12 > len8) { + len8 -= 8; + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ (m2 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ (cp2 & 0xffffffff >> (32 - (len8*8))); + m2 = m2 ^ (m2 & 0xffffffff << ( (len8*8))); + m3 = 0; + } else if (12 == len8) { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = 0; + } else if (16 > len8) { + len8 -= 12; + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + s3 = s3 ^ (m3 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = m3 ^ (cp3 & 0xffffffff >> (32 - (len8*8))); + m3 = m3 ^ (m3 & 0xffffffff << ( (len8*8))); + } else { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + s3 = s3 ^ m3; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = m3 ^ cp3; + } + + *(uint32_t*)&s[0] = s0; + *(uint32_t*)&s[4] = s1; + *(uint32_t*)&s[8] = s2; + *(uint32_t*)&s[12] = s3; + *(uint32_t*)&m[0] = m0; + *(uint32_t*)&m[4] = m1; + *(uint32_t*)&m[8] = m2; + *(uint32_t*)&m[12] = m3; + +#endif + +} + +void reset_lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&CNT[0]) = 0x0000000000000001; // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0 + +#else + + *(uint32_t*)(&CNT[0]) = 0x00000001; // CNT3 CNT2 CNT1 CNT0 + *(uint32_t*)(&CNT[4]) = 0x00000000; // CNT7 CNT6 CNT5 CNT4 + +#endif + +} + +void lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t C0; + uint64_t fb0; + + C0 = *(uint64_t*)(&CNT[0]); // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C0 = C0 << 1 ^ fb0; + + *(uint64_t*)(&CNT[0]) = C0; + +#else + + uint32_t C0; + uint32_t C1; + uint32_t fb0; + + C0 = *(uint32_t*)(&CNT[0]); // CNT3 CNT2 CNT1 CNT0 + C1 = *(uint32_t*)(&CNT[4]); // CNT7 CNT6 CNT5 CNT4 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C1 = C1 << 1 | C0 >> 31; + C0 = C0 << 1 ^ fb0; + + *(uint32_t*)(&CNT[0]) = C0; + *(uint32_t*)(&CNT[4]) = C1; + +#endif + +} + +void block_cipher( + unsigned char* s, + const unsigned char* k, unsigned char* T, + unsigned char* CNT, unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + CNT[7] = D; + p_skinny_ctrl->func_skinny_128_384_enc(s, p_skinny_ctrl, CNT, T, k); + +} + +void nonce_encryption ( + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + block_cipher(s,k,(unsigned char*)N,CNT,D,p_skinny_ctrl); + +} + +void generate_tag ( + unsigned char** c, unsigned char* s, + unsigned long long* clen) { + + g8A_for_Tag_Generation(s, *c); + + *c = *c + 16; + *c = *c - *clen; + +} + +unsigned long long msg_encryption ( + const unsigned char** M, unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* l_skinny_ctrl) { + + int len8; + + if (mlen >= 16) { + len8 = 16; + mlen = mlen - 16; + rho_eqov16(*M, *c, s); + } + else { + len8 = mlen; + mlen = 0; + rho_ud16(*M, *c, s, len8); + } + *c = *c + len8; + *M = *M + len8; + lfsr_gf56(CNT); + if (mlen != 0) { + nonce_encryption(N,CNT,s,k,D,l_skinny_ctrl); + } + return mlen; + +} + +unsigned long long msg_decryption ( + unsigned char** M, const unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long clen, + skinny_ctrl* l_skinny_ctrl) { + + int len8; + + if (clen >= 16) { + len8 = 16; + clen = clen - 16; + irho_eqov16(*M, *c, s); + } + else { + len8 = clen; + clen = 0; + irho_ud16(*M, *c, s, len8); + } + *c = *c + len8; + *M = *M + len8; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,l_skinny_ctrl); + return clen; + +} + +unsigned long long ad2msg_encryption ( + const unsigned char** M, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* l_skinny_ctrl) { + + unsigned char T [16]; + int len8; + + if (mlen <= 16) { + len8 = mlen; + mlen = 0; + } + else { + len8 = 16; + mlen = mlen - 16; + } + + pad (*M,T,len8); + block_cipher(s,k,T,CNT,D,l_skinny_ctrl); + lfsr_gf56(CNT); + *M = *M + len8; + + return mlen; + +} + +unsigned long long ad_encryption ( + const unsigned char** A, unsigned char* s, + const unsigned char* k, unsigned long long adlen, + unsigned char* CNT, + unsigned char D, + skinny_ctrl* l_skinny_ctrl) { + + unsigned char T [16]; + int len8; + + if (adlen >= 16) { + len8 = 16; + adlen = adlen - 16; + + rho_ad_eqov16(*A, s); + } + else { + len8 = adlen; + adlen = 0; + rho_ad_ud16(*A, s, len8); + } + *A = *A + len8; + lfsr_gf56(CNT); + if (adlen != 0) { + if (adlen >= 16) { + len8 = 16; + adlen = adlen - 16; + } + else { + len8 = adlen; + adlen = 0; + } + pad(*A, T, len8); + *A = *A + len8; + block_cipher(s,k,T,CNT,D,l_skinny_ctrl); + lfsr_gf56(CNT); + } + + return adlen; + +} + +int crypto_aead_encrypt ( + unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, + const unsigned char* npub, + const unsigned char* k) { + + unsigned char s[16]; + unsigned char CNT[8]; + unsigned char T[16]; + const unsigned char* N; + unsigned char w; + unsigned long long xlen; + + skinny_ctrl l_skinny_ctrl; + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void)nsec; + N = npub; + + xlen = mlen; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = 0; + *(uint64_t*)(&s[8]) = 0; + +#else + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#endif + + reset_lfsr_gf56(CNT); + + w = 48; + + if (adlen == 0) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 0) { + w = w ^ 8; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) < 16) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 16) { + w = w ^ 0; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else { + w = w ^ 10; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + } + else while (adlen > 0) { + adlen = ad_encryption(&ad,s,k,adlen,CNT,40,&l_skinny_ctrl); + } + + if ((w & 8) == 0) { + xlen = ad2msg_encryption (&m,CNT,s,k,44,xlen,&l_skinny_ctrl); + } + else if (mlen == 0) { + lfsr_gf56(CNT); + } + while (xlen > 0) { + xlen = ad_encryption(&m,s,k,xlen,CNT,44,&l_skinny_ctrl); + } + nonce_encryption(N,CNT,s,k,w,&l_skinny_ctrl); + + // Tag generation + g8A(s, T); + + m = m - mlen; + + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = *(uint64_t*)(&T[0]); + *(uint64_t*)(&s[8]) = *(uint64_t*)(&T[8]); + +#else + + *(uint32_t*)(&s[0]) = *(uint32_t*)(&T[0]); + *(uint32_t*)(&s[4]) = *(uint32_t*)(&T[4]); + *(uint32_t*)(&s[8]) = *(uint32_t*)(&T[8]); + *(uint32_t*)(&s[12]) = *(uint32_t*)(&T[12]); + +#endif + + *clen = mlen + 16; + + if (mlen > 0) { + nonce_encryption(N,CNT,s,k,36,&l_skinny_ctrl); + while (mlen > 16) { + mlen = msg_encryption(&m,&c,N,CNT,s,k,36,mlen,&l_skinny_ctrl); + } + rho_ud16(m, c, s, mlen); + c = c + mlen; + m = m + mlen; + } + + // Tag Concatenation + c[0] = T[0]; + c[1] = T[1]; + c[2] = T[2]; + c[3] = T[3]; + c[4] = T[4]; + c[5] = T[5]; + c[6] = T[6]; + c[7] = T[7]; + c[8] = T[8]; + c[9] = T[9]; + c[10] = T[10]; + c[11] = T[11]; + c[12] = T[12]; + c[13] = T[13]; + c[14] = T[14]; + c[15] = T[15]; + + c = c - *clen; + + return 0; + +} + +int crypto_aead_decrypt( + unsigned char *m,unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c,unsigned long long clen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) { + + unsigned char s[16]; + unsigned char CNT[8]; + unsigned char T[16]; + const unsigned char* N; + unsigned char w; + unsigned long long xlen; + const unsigned char* mauth; + unsigned char* p1; + unsigned char* p2; + + skinny_ctrl l_skinny_ctrl; + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void)nsec; + mauth = m; + + N = npub; + + xlen = clen-16; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = 0; + *(uint64_t*)(&s[8]) = 0; + +#else + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#endif + + reset_lfsr_gf56(CNT); + + w = 48; + + if (adlen == 0) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 0) { + w = w ^ 8; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) < 16) { + w = w ^ 2; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else if (adlen%(32) == 16) { + w = w ^ 0; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + else { + w = w ^ 10; + if (xlen == 0) { + w =w ^ 1; + } + else if (xlen%(32) == 0) { + w = w ^ 4; + } + else if (xlen%(32) < 16) { + w = w ^ 1; + } + else if (xlen%(32) == 16) { + w = w ^ 0; + } + else { + w = w ^ 5; + } + } + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + } + else while (adlen > 0) { + adlen = ad_encryption(&ad,s,k,adlen,CNT,40,&l_skinny_ctrl); + } + + if ((w & 8) == 0) { + xlen = ad2msg_encryption (&mauth,CNT,s,k,44,xlen,&l_skinny_ctrl); + } + else if (clen == 0) { + lfsr_gf56(CNT); + } + while (xlen > 0) { + xlen = ad_encryption(&mauth,s,k,xlen,CNT,44,&l_skinny_ctrl); + } + nonce_encryption(N,CNT,s,k,w,&l_skinny_ctrl); + + // Tag generation + g8A(s, T); + + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + + p1 = T; + p2 = (unsigned char*)&c[clen - 16]; + + p1[0] = p2[0]; + p1[1] = p2[1]; + p1[2] = p2[2]; + p1[3] = p2[3]; + p1[4] = p2[4]; + p1[5] = p2[5]; + p1[6] = p2[6]; + p1[7] = p2[7]; + p1[8] = p2[8]; + p1[9] = p2[9]; + p1[10] = p2[10]; + p1[11] = p2[11]; + p1[12] = p2[12]; + p1[13] = p2[13]; + p1[14] = p2[14]; + p1[15] = p2[15]; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = *(uint64_t*)(&T[0]); + *(uint64_t*)(&s[8]) = *(uint64_t*)(&T[8]); + +#else + + *(uint32_t*)(&s[0]) = *(uint32_t*)(&T[0]); + *(uint32_t*)(&s[4]) = *(uint32_t*)(&T[4]); + *(uint32_t*)(&s[8]) = *(uint32_t*)(&T[8]); + *(uint32_t*)(&s[12]) = *(uint32_t*)(&T[12]); + +#endif + + clen = clen - 16; + *mlen = clen; + + if (clen > 0) { + nonce_encryption(N,CNT,s,k,36,&l_skinny_ctrl); + + l_skinny_ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + while (clen > 16) { + clen = msg_decryption(&m,&c,N,CNT,s,k,36,clen,&l_skinny_ctrl); + } + irho_ud16(m, c, s, clen); + c = c + clen; + m = m + clen; + } + + for (int i = 0; i < 16; i++) { + if (T[i] != (*(c+i))) { + return -1; + } + } + + return 0; + +} diff --git a/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny.h b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny.h new file mode 100644 index 0000000..826f2f8 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny.h @@ -0,0 +1,106 @@ +#define ___SKINNY_LOOP +#define ___NUM_OF_ROUNDS_56 +#if (defined(__riscv_xlen) && (__riscv_xlen == 64)) +#define ___ENABLE_DWORD_CAST +#endif + +#include + +typedef struct ___skinny_ctrl { +#ifdef ___NUM_OF_ROUNDS_56 + unsigned char roundKeys[960]; // number of rounds : 56 +#else + unsigned char roundKeys[704]; // number of rounds : 40 +#endif + void (*func_skinny_128_384_enc)(unsigned char*, struct ___skinny_ctrl*, unsigned char* CNT, unsigned char* T, const unsigned char* K); +} skinny_ctrl; + +extern void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); + +#define pack_word(x0, x1, x2, x3, w) \ + w = ((x3) << 24) ^ \ + ((x2) << 16) ^ \ + ((x1) << 8) ^ \ + (x0); + +#define unpack_word(x0, x1, x2, x3, w) \ + x0 = ((w) & 0xff); \ + x1 = (((w) >> 8) & 0xff); \ + x2 = (((w) >> 16) & 0xff); \ + x3 = ((w) >> 24); + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* dw (7 6 5 4 3 2 1 0) */ \ + \ + /* dw (5 7 2 3 6 0 4 1) */ \ + \ + dt0 = dw >> 24; /* - - - 7 6 5 4 3 */ \ + dt0 = dt0 & 0x00000000ff00ff00; /* - - - - 6 - 4 - */ \ + \ + dt1 = dw << 16; /* 5 4 3 2 1 0 - - */ \ + dt1 = dt1 & 0xff00000000ff0000; /* 5 - - - - 0 - - */ \ + dt0 = dt0 ^ dt1; /* 5 - - - 6 0 4 - */ \ + \ + dt1 = dw >> 8; /* - 7 6 5 4 3 2 1 */ \ + dt1 = dt1 & 0x00ff0000000000ff; /* - 7 - - - - - 1 */ \ + dt0 = dt0 ^ dt1; /* 5 7 - - 6 0 4 1 */ \ + \ + dt1 = dw << 8; /* 6 5 4 3 2 1 0 - */ \ + dt1 = dt1 & 0x000000ff00000000; /* - - - 3 - - - - */ \ + dt0 = dt0 ^ dt1; /* 5 7 - 3 6 0 4 1 */ \ + \ + dt1 = dw << 24; /* 4 3 2 1 0 - - - */ \ + dw = dt1 & 0x0000ff0000000000; /* - - 2 - - - - - */ \ + dw = dw ^ dt0; /* 5 7 2 3 6 0 4 1 */ + +#else + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* w0 (3 2 1 0) */ \ + /* w1 (7 6 5 4) */ \ + \ + /* w0 (6 0 4 1) */ \ + /* w1 (5 7 2 3) */ \ + \ + t0 = w1 << 8; /* 6 5 4 - */ \ + t0 = t0 & 0xff00ff00; /* 6 - 4 - */ \ + \ + t1 = w1 << 16; /* 5 4 - - */ \ + t1 = t1 & 0xff000000; /* 5 - - - */ \ + \ + t2 = w1 & 0xff000000; /* 7 - - - */ \ + t2 = t2 >> 8; /* - 7 - - */ \ + t1 = t1 ^ t2; /* 5 7 - - */ \ + \ + t2 = w0 & 0xff000000; /* 3 - - - */ \ + t2 = t2 >> 24; /* - - - 3 */ \ + t1 = t1 ^ t2; /* 5 7 - 3 */ \ + \ + w1 = w0 >> 8; /* - 3 2 1 */ \ + w1 = w1 & 0x0000ff00; /* - - 2 - */ \ + w1 = w1 ^ t1; /* 5 7 2 3 */ \ + \ + t2 = w0 & 0x0000ff00; /* - - 1 - */ \ + t2 = t2 >> 8; /* - - - 1 */ \ + t0 = t0 ^ t2; /* 6 - 4 1 */ \ + \ + w0 = w0 << 16; /* 1 0 - - */ \ + w0 = w0 & 0x00ff0000; /* - 0 - - */ \ + w0 = w0 ^ t0; /* 6 0 4 1 */ + +#endif + diff --git a/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_key_schedule2.c b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_key_schedule2.c new file mode 100644 index 0000000..c2f30de --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_key_schedule2.c @@ -0,0 +1,431 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * load * AC(c0 c1) ^ TK3 + * calc AC(c0 c1) ^ TK2 -> store + * ART(TK2) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + dw = ((dw << 1) & 0xfefefefefefefefe) ^ \ + (((dw >> 7) ^ (dw >> 5)) & 0x0101010101010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2 = dw ^ *tk3; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint64_t* tk2; // used in MACRO + uint64_t* tk3; // used in MACRO + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + // load master key + dw = *(uint64_t*)&roundKeys[16]; + + tk2 = (uint64_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2 = dw ^ *tk3; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[24]; + + tk2 = (uint64_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint64_t* tk2; // used in MACRO + uint64_t* tk3; // used in MACRO + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[16]; + + tk2 = (uint64_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2 = dw ^ *tk3; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[24]; + + tk2 = (uint64_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + w0 = ((w0 << 1) & 0xfefefefe) ^ \ + (((w0 >> 7) ^ (w0 >> 5)) & 0x01010101); \ + w1 = ((w1 << 1) & 0xfefefefe) ^ \ + (((w1 >> 7) ^ (w1 >> 5)) & 0x01010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2++ = w0 ^ *tk3++; \ + *tk2++ = w1 ^ *tk3++; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[16]; + w1 = *(uint32_t*)&roundKeys[20]; + + tk2 = (uint32_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[24]; + w1 = *(uint32_t*)&roundKeys[28]; + + tk2 = (uint32_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[16]; + w1 = *(uint32_t*)&roundKeys[20]; + + tk2 = (uint32_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[24]; + w1 = *(uint32_t*)&roundKeys[28]; + + tk2 = (uint32_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_key_schedule3.c b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_key_schedule3.c new file mode 100644 index 0000000..5dcaf7f --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_key_schedule3.c @@ -0,0 +1,428 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * AC(c0 c1) ^ TK3 -> store + * ART(TK3) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + dw = ((dw >> 1) & 0x7f7f7f7f7f7f7f7f) ^ \ + (((dw << 7) ^ (dw << 1)) & 0x8080808080808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + dt0 = dw ^ c0Val; \ + *tk3 = dt0 ^ ((uint64_t)c1Val << 40); \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys) +{ + uint64_t *tk3; + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[32]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = dw ^ 0x01; + tk3 += 1; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x0); + PERMUTATION_TK3(0xf, 0x1); + PERMUTATION_TK3(0xd, 0x3); + PERMUTATION_TK3(0x7, 0x3); + PERMUTATION_TK3(0xe, 0x1); + PERMUTATION_TK3(0x9, 0x3); + PERMUTATION_TK3(0x7, 0x2); + PERMUTATION_TK3(0xd, 0x1); + PERMUTATION_TK3(0x5, 0x3); + + PERMUTATION_TK3(0x6, 0x1); + PERMUTATION_TK3(0x8, 0x1); + PERMUTATION_TK3(0x1, 0x2); + PERMUTATION_TK3(0x5, 0x0); + PERMUTATION_TK3(0x7, 0x1); + PERMUTATION_TK3(0xc, 0x1); + PERMUTATION_TK3(0x1, 0x3); + PERMUTATION_TK3(0x6, 0x0); + PERMUTATION_TK3(0xb, 0x1); + PERMUTATION_TK3(0xd, 0x2); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x3); + PERMUTATION_TK3(0x2, 0x1); + PERMUTATION_TK3(0x8, 0x0); + PERMUTATION_TK3(0x2, 0x2); + PERMUTATION_TK3(0x9, 0x0); + PERMUTATION_TK3(0x6, 0x2); + PERMUTATION_TK3(0x9, 0x1); + PERMUTATION_TK3(0x5, 0x2); + +#endif + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[40]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x0); + PERMUTATION_TK3(0xf, 0x0); + PERMUTATION_TK3(0xe, 0x3); + PERMUTATION_TK3(0xb, 0x3); + PERMUTATION_TK3(0xf, 0x2); + PERMUTATION_TK3(0xc, 0x3); + PERMUTATION_TK3(0x3, 0x3); + PERMUTATION_TK3(0xe, 0x0); + PERMUTATION_TK3(0xa, 0x3); + PERMUTATION_TK3(0xb, 0x2); + + PERMUTATION_TK3(0xc, 0x2); + PERMUTATION_TK3(0x0, 0x3); + PERMUTATION_TK3(0x2, 0x0); + PERMUTATION_TK3(0xb, 0x0); + PERMUTATION_TK3(0xe, 0x2); + PERMUTATION_TK3(0x8, 0x3); + PERMUTATION_TK3(0x3, 0x2); + PERMUTATION_TK3(0xd, 0x0); + PERMUTATION_TK3(0x6, 0x3); + PERMUTATION_TK3(0xa, 0x1); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x2); + PERMUTATION_TK3(0x4, 0x2); + PERMUTATION_TK3(0x1, 0x1); + PERMUTATION_TK3(0x4, 0x0); + PERMUTATION_TK3(0x3, 0x1); + PERMUTATION_TK3(0xc, 0x0); + PERMUTATION_TK3(0x2, 0x3); + PERMUTATION_TK3(0xa, 0x0); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC) +{ + uint64_t *tk3; + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + uint64_t c0; + uint64_t c1; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[32]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = dw ^ 0x01; + tk3 += 1; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[40]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = (uint64_t*)&roundKeys[392]; +#else + pRC -= 110; + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + w0 = ((w0 >> 1) & 0x7f7f7f7f) ^ \ + (((w0 << 7) ^ (w0 << 1)) & 0x80808080); \ + w1 = ((w1 >> 1) & 0x7f7f7f7f) ^ \ + (((w1 << 7) ^ (w1 << 1)) & 0x80808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + *tk3++ = w0 ^ c0Val; \ + *tk3++ = w1 ^ ((uint32_t)c1Val << 8); \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[32]; + w1 = *(uint32_t*)&roundKeys[36]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x0); + PERMUTATION_TK3(0xf, 0x1); + PERMUTATION_TK3(0xd, 0x3); + PERMUTATION_TK3(0x7, 0x3); + PERMUTATION_TK3(0xe, 0x1); + PERMUTATION_TK3(0x9, 0x3); + PERMUTATION_TK3(0x7, 0x2); + PERMUTATION_TK3(0xd, 0x1); + PERMUTATION_TK3(0x5, 0x3); + + PERMUTATION_TK3(0x6, 0x1); + PERMUTATION_TK3(0x8, 0x1); + PERMUTATION_TK3(0x1, 0x2); + PERMUTATION_TK3(0x5, 0x0); + PERMUTATION_TK3(0x7, 0x1); + PERMUTATION_TK3(0xc, 0x1); + PERMUTATION_TK3(0x1, 0x3); + PERMUTATION_TK3(0x6, 0x0); + PERMUTATION_TK3(0xb, 0x1); + PERMUTATION_TK3(0xd, 0x2); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x3); + PERMUTATION_TK3(0x2, 0x1); + PERMUTATION_TK3(0x8, 0x0); + PERMUTATION_TK3(0x2, 0x2); + PERMUTATION_TK3(0x9, 0x0); + PERMUTATION_TK3(0x6, 0x2); + PERMUTATION_TK3(0x9, 0x1); + PERMUTATION_TK3(0x5, 0x2); + +#endif + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[40]; + w1 = *(uint32_t*)&roundKeys[44]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x0); + PERMUTATION_TK3(0xf, 0x0); + PERMUTATION_TK3(0xe, 0x3); + PERMUTATION_TK3(0xb, 0x3); + PERMUTATION_TK3(0xf, 0x2); + PERMUTATION_TK3(0xc, 0x3); + PERMUTATION_TK3(0x3, 0x3); + PERMUTATION_TK3(0xe, 0x0); + PERMUTATION_TK3(0xa, 0x3); + PERMUTATION_TK3(0xb, 0x2); + + PERMUTATION_TK3(0xc, 0x2); + PERMUTATION_TK3(0x0, 0x3); + PERMUTATION_TK3(0x2, 0x0); + PERMUTATION_TK3(0xb, 0x0); + PERMUTATION_TK3(0xe, 0x2); + PERMUTATION_TK3(0x8, 0x3); + PERMUTATION_TK3(0x3, 0x2); + PERMUTATION_TK3(0xd, 0x0); + PERMUTATION_TK3(0x6, 0x3); + PERMUTATION_TK3(0xa, 0x1); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x2); + PERMUTATION_TK3(0x4, 0x2); + PERMUTATION_TK3(0x1, 0x1); + PERMUTATION_TK3(0x4, 0x0); + PERMUTATION_TK3(0x3, 0x1); + PERMUTATION_TK3(0xc, 0x0); + PERMUTATION_TK3(0x2, 0x3); + PERMUTATION_TK3(0xa, 0x0); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + uint32_t c0; + uint32_t c1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[32]; + w1 = *(uint32_t*)&roundKeys[36]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[40]; + w1 = *(uint32_t*)&roundKeys[44]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = (uint32_t*)&roundKeys[392]; +#else + pRC -= 110; + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_main.c b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_main.c new file mode 100644 index 0000000..8a6e75f --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusm1/opt32a_NEC/skinny_main.c @@ -0,0 +1,675 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * ART(TK1) -> store + * load AC(c0 c1) ^ TK3 ^ TK2 + * load TK1 + * calc AC(c0 c1) ^ TK3 ^ TK2 ^ TK1 -> use at (AC->ART) + * SC->SR->(AC->ART)->MC + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +/* + * S-BOX + */ +unsigned char SBOX[] += { + // Original + 0x65, 0x4c, 0x6a, 0x42, 0x4b, 0x63, 0x43, 0x6b, 0x55, 0x75, 0x5a, 0x7a, 0x53, 0x73, 0x5b, 0x7b, + 0x35, 0x8c, 0x3a, 0x81, 0x89, 0x33, 0x80, 0x3b, 0x95, 0x25, 0x98, 0x2a, 0x90, 0x23, 0x99, 0x2b, + 0xe5, 0xcc, 0xe8, 0xc1, 0xc9, 0xe0, 0xc0, 0xe9, 0xd5, 0xf5, 0xd8, 0xf8, 0xd0, 0xf0, 0xd9, 0xf9, + 0xa5, 0x1c, 0xa8, 0x12, 0x1b, 0xa0, 0x13, 0xa9, 0x05, 0xb5, 0x0a, 0xb8, 0x03, 0xb0, 0x0b, 0xb9, + 0x32, 0x88, 0x3c, 0x85, 0x8d, 0x34, 0x84, 0x3d, 0x91, 0x22, 0x9c, 0x2c, 0x94, 0x24, 0x9d, 0x2d, + 0x62, 0x4a, 0x6c, 0x45, 0x4d, 0x64, 0x44, 0x6d, 0x52, 0x72, 0x5c, 0x7c, 0x54, 0x74, 0x5d, 0x7d, + 0xa1, 0x1a, 0xac, 0x15, 0x1d, 0xa4, 0x14, 0xad, 0x02, 0xb1, 0x0c, 0xbc, 0x04, 0xb4, 0x0d, 0xbd, + 0xe1, 0xc8, 0xec, 0xc5, 0xcd, 0xe4, 0xc4, 0xed, 0xd1, 0xf1, 0xdc, 0xfc, 0xd4, 0xf4, 0xdd, 0xfd, + 0x36, 0x8e, 0x38, 0x82, 0x8b, 0x30, 0x83, 0x39, 0x96, 0x26, 0x9a, 0x28, 0x93, 0x20, 0x9b, 0x29, + 0x66, 0x4e, 0x68, 0x41, 0x49, 0x60, 0x40, 0x69, 0x56, 0x76, 0x58, 0x78, 0x50, 0x70, 0x59, 0x79, + 0xa6, 0x1e, 0xaa, 0x11, 0x19, 0xa3, 0x10, 0xab, 0x06, 0xb6, 0x08, 0xba, 0x00, 0xb3, 0x09, 0xbb, + 0xe6, 0xce, 0xea, 0xc2, 0xcb, 0xe3, 0xc3, 0xeb, 0xd6, 0xf6, 0xda, 0xfa, 0xd3, 0xf3, 0xdb, 0xfb, + 0x31, 0x8a, 0x3e, 0x86, 0x8f, 0x37, 0x87, 0x3f, 0x92, 0x21, 0x9e, 0x2e, 0x97, 0x27, 0x9f, 0x2f, + 0x61, 0x48, 0x6e, 0x46, 0x4f, 0x67, 0x47, 0x6f, 0x51, 0x71, 0x5e, 0x7e, 0x57, 0x77, 0x5f, 0x7f, + 0xa2, 0x18, 0xae, 0x16, 0x1f, 0xa7, 0x17, 0xaf, 0x01, 0xb2, 0x0e, 0xbe, 0x07, 0xb7, 0x0f, 0xbf, + 0xe2, 0xca, 0xee, 0xc6, 0xcf, 0xe7, 0xc7, 0xef, 0xd2, 0xf2, 0xde, 0xfe, 0xd7, 0xf7, 0xdf, 0xff, +}; + + /* + * S-BOX ^ AC(c2) + */ +unsigned char SBOX2[] += { // Original ^ c2(0x02) + 0x67, 0x4e, 0x68, 0x40, 0x49, 0x61, 0x41, 0x69, 0x57, 0x77, 0x58, 0x78, 0x51, 0x71, 0x59, 0x79, + 0x37, 0x8e, 0x38, 0x83, 0x8b, 0x31, 0x82, 0x39, 0x97, 0x27, 0x9a, 0x28, 0x92, 0x21, 0x9b, 0x29, + 0xe7, 0xce, 0xea, 0xc3, 0xcb, 0xe2, 0xc2, 0xeb, 0xd7, 0xf7, 0xda, 0xfa, 0xd2, 0xf2, 0xdb, 0xfb, + 0xa7, 0x1e, 0xaa, 0x10, 0x19, 0xa2, 0x11, 0xab, 0x07, 0xb7, 0x08, 0xba, 0x01, 0xb2, 0x09, 0xbb, + 0x30, 0x8a, 0x3e, 0x87, 0x8f, 0x36, 0x86, 0x3f, 0x93, 0x20, 0x9e, 0x2e, 0x96, 0x26, 0x9f, 0x2f, + 0x60, 0x48, 0x6e, 0x47, 0x4f, 0x66, 0x46, 0x6f, 0x50, 0x70, 0x5e, 0x7e, 0x56, 0x76, 0x5f, 0x7f, + 0xa3, 0x18, 0xae, 0x17, 0x1f, 0xa6, 0x16, 0xaf, 0x00, 0xb3, 0x0e, 0xbe, 0x06, 0xb6, 0x0f, 0xbf, + 0xe3, 0xca, 0xee, 0xc7, 0xcf, 0xe6, 0xc6, 0xef, 0xd3, 0xf3, 0xde, 0xfe, 0xd6, 0xf6, 0xdf, 0xff, + 0x34, 0x8c, 0x3a, 0x80, 0x89, 0x32, 0x81, 0x3b, 0x94, 0x24, 0x98, 0x2a, 0x91, 0x22, 0x99, 0x2b, + 0x64, 0x4c, 0x6a, 0x43, 0x4b, 0x62, 0x42, 0x6b, 0x54, 0x74, 0x5a, 0x7a, 0x52, 0x72, 0x5b, 0x7b, + 0xa4, 0x1c, 0xa8, 0x13, 0x1b, 0xa1, 0x12, 0xa9, 0x04, 0xb4, 0x0a, 0xb8, 0x02, 0xb1, 0x0b, 0xb9, + 0xe4, 0xcc, 0xe8, 0xc0, 0xc9, 0xe1, 0xc1, 0xe9, 0xd4, 0xf4, 0xd8, 0xf8, 0xd1, 0xf1, 0xd9, 0xf9, + 0x33, 0x88, 0x3c, 0x84, 0x8d, 0x35, 0x85, 0x3d, 0x90, 0x23, 0x9c, 0x2c, 0x95, 0x25, 0x9d, 0x2d, + 0x63, 0x4a, 0x6c, 0x44, 0x4d, 0x65, 0x45, 0x6d, 0x53, 0x73, 0x5c, 0x7c, 0x55, 0x75, 0x5d, 0x7d, + 0xa0, 0x1a, 0xac, 0x14, 0x1d, 0xa5, 0x15, 0xad, 0x03, 0xb0, 0x0c, 0xbc, 0x05, 0xb5, 0x0d, 0xbd, + 0xe0, 0xc8, 0xec, 0xc4, 0xcd, 0xe5, 0xc5, 0xed, 0xd0, 0xf0, 0xdc, 0xfc, 0xd5, 0xf5, 0xdd, 0xfd, +}; + +#ifdef ___SKINNY_LOOP +/* + * Round Constants + */ +unsigned char RC[] += { + 0x01, 0x00, 0x03, 0x00, 0x07, 0x00, 0x0f, 0x00, 0x0f, 0x01, 0x0e, 0x03, 0x0d, 0x03, 0x0b, 0x03, + 0x07, 0x03, 0x0f, 0x02, 0x0e, 0x01, 0x0c, 0x03, 0x09, 0x03, 0x03, 0x03, 0x07, 0x02, 0x0e, 0x00, + 0x0d, 0x01, 0x0a, 0x03, 0x05, 0x03, 0x0b, 0x02, 0x06, 0x01, 0x0c, 0x02, 0x08, 0x01, 0x00, 0x03, + 0x01, 0x02, 0x02, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x07, 0x01, 0x0e, 0x02, 0x0c, 0x01, 0x08, 0x03, + 0x01, 0x03, 0x03, 0x02, 0x06, 0x00, 0x0d, 0x00, 0x0b, 0x01, 0x06, 0x03, 0x0d, 0x02, 0x0a, 0x01, +#ifdef ___NUM_OF_ROUNDS_56 + 0x04, 0x03, 0x09, 0x02, 0x02, 0x01, 0x04, 0x02, 0x08, 0x00, 0x01, 0x01, 0x02, 0x02, 0x04, 0x00, + 0x09, 0x00, 0x03, 0x01, 0x06, 0x02, 0x0c, 0x00, 0x09, 0x01, 0x02, 0x03, 0x05, 0x02, 0x0a, 0x00, +#endif + }; +#endif + +extern void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2); +extern void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys); +#ifdef ___SKINNY_LOOP +extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC); +#else +extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys); +#endif + +void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + uint32_t *pt = (uint32_t*)&pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + pt[8] = *(uint32_t*)(&K[0]); + pack_word(K[7], K[4], K[5], K[6], pt[9]); + pt[10] = *(uint32_t*)(&K[8]); + pack_word(K[15], K[12], K[13], K[14], pt[11]); + +#ifdef ___SKINNY_LOOP + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys, RC); +#else + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys); +#endif + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + + pskinny_ctrl->func_skinny_128_384_enc = skinny_128_384_enc12_12; + +} + +void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)T; + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +#define PERMUTATION_TK1() \ + \ +/* permutation */ \ +{ \ + unsigned char tmp0 = roundKeys[0]; \ + unsigned char tmp1 = roundKeys[1]; \ + unsigned char tmp2 = roundKeys[2]; \ + unsigned char tmp3 = roundKeys[3]; \ + unsigned char tmp4 = roundKeys[4]; \ + unsigned char tmp5 = roundKeys[5]; \ + unsigned char tmp6 = roundKeys[6]; \ + unsigned char tmp7 = roundKeys[7]; \ + \ + unsigned char* dst = &roundKeys[8]; \ + \ + /* 5 7 2 3 6 0 4 1 */ \ + *dst++ = tmp1; \ + *dst++ = tmp4; \ + *dst++ = tmp0; \ + *dst++ = tmp6; \ + *dst++ = tmp3; \ + *dst++ = tmp2; \ + *dst++ = tmp7; \ + *dst++ = tmp5; \ + \ + /* 2 5 0 6 7 1 3 4 */ \ + *dst++ = tmp4; \ + *dst++ = tmp3; \ + *dst++ = tmp1; \ + *dst++ = tmp7; \ + *dst++ = tmp6; \ + *dst++ = tmp0; \ + *dst++ = tmp5; \ + *dst++ = tmp2; \ + \ + /* 0 2 1 7 5 4 6 3 */ \ + *dst++ = tmp3; \ + *dst++ = tmp6; \ + *dst++ = tmp4; \ + *dst++ = tmp5; \ + *dst++ = tmp7; \ + *dst++ = tmp1; \ + *dst++ = tmp2; \ + *dst++ = tmp0; \ + \ + /* 1 0 4 5 2 3 7 6 */ \ + *dst++ = tmp6; \ + *dst++ = tmp7; \ + *dst++ = tmp3; \ + *dst++ = tmp2; \ + *dst++ = tmp5; \ + *dst++ = tmp4; \ + *dst++ = tmp0; \ + *dst++ = tmp1; \ + \ + /* 4 1 3 2 0 6 5 7 */ \ + *dst++ = tmp7; \ + *dst++ = tmp5; \ + *dst++ = tmp6; \ + *dst++ = tmp0; \ + *dst++ = tmp2; \ + *dst++ = tmp3; \ + *dst++ = tmp1; \ + *dst++ = tmp4; \ + \ + /* 3 4 6 0 1 7 2 5 */ \ + *dst++ = tmp5; \ + *dst++ = tmp2; \ + *dst++ = tmp7; \ + *dst++ = tmp1; \ + *dst++ = tmp0; \ + *dst++ = tmp6; \ + *dst++ = tmp4; \ + *dst++ = tmp3; \ + \ + /* 6 3 7 1 4 5 0 2 */ \ + *dst++ = tmp2; \ + *dst++ = tmp0; \ + *dst++ = tmp5; \ + *dst++ = tmp4; \ + *dst++ = tmp1; \ + *dst++ = tmp7; \ + *dst++ = tmp3; \ + *dst++ = tmp6; \ +} + +#define SBOX_0(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t0; \ + b1 = (uint8_t)t1; \ + b2 = (uint8_t)t2; \ + b3 = (uint8_t)t3; + +#define SBOX_8(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t3; \ + b1 = (uint8_t)t0; \ + b2 = (uint8_t)t1; \ + b3 = (uint8_t)t2; + +#define SBOX_16(b0, b1, b2, b3) \ + \ + t0 = sbox2[b0]; /* AC(c2) */ \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t2; \ + b1 = (uint8_t)t3; \ + b2 = (uint8_t)t0; \ + b3 = (uint8_t)t1; + +#define SBOX_24(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t1; \ + b1 = (uint8_t)t2; \ + b2 = (uint8_t)t3; \ + b3 = (uint8_t)t0; + +#ifdef ___ENABLE_DWORD_CAST + +#define SKINNY_MAIN() \ +{ \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK1^TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint64_t*)&block[0]; \ + t1 ^= *tk1++; \ + t1 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint64_t*)&block[8]; \ + t0 = t2 >> 32; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = (t1 >> 32) ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t0 ^ t3; \ + \ + *(uint32_t*)&block[0] = (uint32_t)t0; \ + *(uint32_t*)&block[4] = (uint32_t)t1; \ + *(uint32_t*)&block[8] = (uint32_t)t2; \ + *(uint32_t*)&block[12] = (uint32_t)t3; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint64_t*)&block[0]; \ + t1 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint64_t*)&block[8]; \ + t0 = t2 >> 32; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = (t1 >> 32) ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t0 ^ t3; \ + \ + *(uint32_t*)&block[0] = (uint32_t)t0; \ + *(uint32_t*)&block[4] = (uint32_t)t1; \ + *(uint32_t*)&block[8] = (uint32_t)t2; \ + *(uint32_t*)&block[12] = (uint32_t)t3; \ +} + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint64_t *tk1; + uint64_t *tk2; + uint64_t t0; // used in MACRO + uint64_t t1; // used in MACRO + uint64_t t2; // used in MACRO + uint64_t t3; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint64_t*)&roundKeys[64]; + tk1 = (uint64_t*)&roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint64_t *tk1; + uint64_t *tk2; + uint64_t t0; // used in MACRO + uint64_t t1; // used in MACRO + uint64_t t2; // used in MACRO + uint64_t t3; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint64_t*)&roundKeys[64]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = (uint64_t*)&roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = (uint64_t*)&roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define SKINNY_MAIN() \ +{ \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK1^TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint32_t*)&block[0]; \ + t0 = *(uint32_t*)&block[4]; \ + t1 ^= *tk1++; \ + t1 ^= *tk2++; \ + t0 ^= *tk1++; \ + t0 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint32_t*)&block[8]; \ + t4 = *(uint32_t*)&block[12]; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = t0 ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t3 ^ t4; \ + \ + *(uint32_t*)&block[0] = t0; \ + *(uint32_t*)&block[4] = t1; \ + *(uint32_t*)&block[8] = t2; \ + *(uint32_t*)&block[12] = t3; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint32_t*)&block[0]; \ + t0 = *(uint32_t*)&block[4]; \ + t1 ^= *tk2++; \ + t0 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint32_t*)&block[8]; \ + t4 = *(uint32_t*)&block[12]; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = t0 ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t3 ^ t4; \ + \ + *(uint32_t*)&block[0] = t0; \ + *(uint32_t*)&block[4] = t1; \ + *(uint32_t*)&block[8] = t2; \ + *(uint32_t*)&block[12] = t3; \ +} + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t t4; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint32_t*)&roundKeys[64]; + tk1 = (uint32_t*)&roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t t4; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint32_t*)&roundKeys[64]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = (uint32_t*)&roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = (uint32_t*)&roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/api.h b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/encrypt.c b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/encrypt.c new file mode 100644 index 0000000..f329721 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/encrypt.c @@ -0,0 +1,1136 @@ +/* + * Date: 29 November 2018 + * Contact: Thomas Peyrin - thomas.peyrin@gmail.com + * Mustafa Khairallah - mustafam001@e.ntu.edu.sg + */ + +#include "crypto_aead.h" +#include "api.h" +#include "skinny.h" +#include +#include + +void pad (const unsigned char* m, unsigned char* mp, int len8) { + +#ifdef ___ENABLE_DWORD_CAST + + if (0 == len8) { + *(uint64_t*)(&mp[0]) = 0; + *(uint64_t*)(&mp[8]) = 0; + } else if (8 > len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]) & (0xffffffffffffffff >> (64 - len8*8)); + *(uint64_t*)(&mp[8]) = 0; + mp[15] = len8; + } else if (8 == len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = 0; + mp[15] = 8; + } else if (16 > len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]) & (0xffffffffffffffff >> (128 - len8*8)); + mp[15] = len8; + } else { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]); + } + +#else + + if (0 == len8) { + *(uint32_t*)(&mp[0]) = 0; + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + } else if (4 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]) & (0xffffffff >> (32 - len8*8)); + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (4 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 4; + } else if (8 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]) & (0xffffffff >> (64 - len8*8)); + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (8 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 8; + } else if (12 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]) & (0xffffffff >> (96 - len8*8)); + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (12 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 12; + } else if (16 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]) & (0xffffffff >> (128 - len8*8)); + mp[15] = len8; + } else { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]); + } + +#endif + +} + +void g8A (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t c0, c1; + + c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080); + + *(uint64_t*)(&c[0]) = c0; + *(uint64_t*)(&c[8]) = c1; + +#else + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#endif + +} + +void g8A_for_Tag_Generation (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t c0, c1; + + c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = (c0>>24)&0xFF; + c[4] = (c0>>32)&0xFF; + c[5] = (c0>>40)&0xFF; + c[6] = (c0>>48)&0xFF; + c[7] = c0>>56; + c[8] = c1 &0xFF; + c[9] = (c1>>8) &0xFF; + c[10] = (c1>>16)&0xFF; + c[11] = (c1>>24)&0xFF; + c[12] = (c1>>32)&0xFF; + c[13] = (c1>>40)&0xFF; + c[14] = (c1>>48)&0xFF; + c[15] = c1>>56; + +#else + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = c0>>24; + c[4] = c1 &0xFF; + c[5] = (c1>>8) &0xFF; + c[6] = (c1>>16)&0xFF; + c[7] = c1>>24; + c[8] = c2 &0xFF; + c[9] = (c2>>8) &0xFF; + c[10] = (c2>>16)&0xFF; + c[11] = c2>>24; + c[12] = c3 &0xFF; + c[13] = (c3>>8) &0xFF; + c[14] = (c3>>16)&0xFF; + c[15] = c3>>24; + +#endif + +} + +void rho_ad_eqov16 ( + const unsigned char* m, + unsigned char* s) { + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) ^= *(uint64_t*)(&m[0]); + *(uint64_t*)(&s[8]) ^= *(uint64_t*)(&m[8]); + +#else + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&m[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&m[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&m[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&m[12]); + +#endif + +} + +void rho_ad_ud16 ( + const unsigned char* m, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + pad(m,mp,len8); + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) ^= *(uint64_t*)(&mp[0]); + *(uint64_t*)(&s[8]) ^= *(uint64_t*)(&mp[8]); + +#else + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&mp[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&mp[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&mp[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&mp[12]); + +#endif + +} + +void rho_eqov16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s) { + + g8A(s,c); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t c0 = *(uint64_t*)(&c[0]); + uint64_t c1 = *(uint64_t*)(&c[8]); + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t m0 = *(uint64_t*)(&m[0]); + uint64_t m1 = *(uint64_t*)(&m[8]); + + s0 ^= m0; + s1 ^= m1; + + c0 ^= m0; + c1 ^= m1; + + *(uint64_t*)(&s[0]) = s0; + *(uint64_t*)(&s[8]) = s1; + + *(uint64_t*)(&c[0]) = c0; + *(uint64_t*)(&c[8]) = c1; + +#else + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= m0; + s1 ^= m1; + s2 ^= m2; + s3 ^= m3; + + c0 ^= m0; + c1 ^= m1; + c2 ^= m2; + c3 ^= m3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#endif + +} + +void rho_ud16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + + pad(m,mp,len8); + + g8A(s,c); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t mp0 = *(uint64_t*)&mp[0]; + uint64_t mp1 = *(uint64_t*)&mp[8]; + uint64_t c0 = *(uint64_t*)&c[0]; + uint64_t c1 = *(uint64_t*)&c[8]; + + *(uint64_t*)(&s[0]) ^= mp0; + *(uint64_t*)(&s[8]) ^= mp1; + + if (0 == len8) { + c0 = 0; + c1 = 0; + } else if (8 > len8) { + c0 = c0 ^ (mp0 & 0xffffffffffffffff >> (64 - (len8*8))); + c0 = c0 ^ (c0 & 0xffffffffffffffff << ( (len8*8))); + c1 = 0; + } else if (8 == len8) { + c0 = c0 ^ mp0; + c1 = 0; + } else if (16 > len8) { + len8 -= 8; + c0 = c0 ^ mp0; + c1 = c1 ^ (mp1 & 0xffffffffffffffff >> (64 - (len8*8))); + c1 = c1 ^ (c1 & 0xffffffffffffffff << ( (len8*8))); + } else { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + } + + *(uint64_t*)&c[0] = c0; + *(uint64_t*)&c[8] = c1; + +#else + + uint32_t mp0 = *(uint32_t*)&mp[0]; + uint32_t mp1 = *(uint32_t*)&mp[4]; + uint32_t mp2 = *(uint32_t*)&mp[8]; + uint32_t mp3 = *(uint32_t*)&mp[12]; + uint32_t c0 = *(uint32_t*)&c[0]; + uint32_t c1 = *(uint32_t*)&c[4]; + uint32_t c2 = *(uint32_t*)&c[8]; + uint32_t c3 = *(uint32_t*)&c[12]; + + *(uint32_t*)(&s[0]) ^= mp0; + *(uint32_t*)(&s[4]) ^= mp1; + *(uint32_t*)(&s[8]) ^= mp2; + *(uint32_t*)(&s[12]) ^= mp3; + + if (0 == len8) { + c0 = 0; + c1 = 0; + c2 = 0; + c3 = 0; + } else if (4 > len8) { + c0 = c0 ^ (mp0 & 0xffffffff >> (32 - (len8*8))); + c0 = c0 ^ (c0 & 0xffffffff << ( (len8*8))); + c1 = 0; + c2 = 0; + c3 = 0; + } else if (4 == len8) { + c0 = c0 ^ mp0; + c1 = 0; + c2 = 0; + c3 = 0; + } else if (8 > len8) { + len8 -= 4; + c0 = c0 ^ mp0; + c1 = c1 ^ (mp1 & 0xffffffff >> (32 - (len8*8))); + c1 = c1 ^ (c1 & 0xffffffff << ( (len8*8))); + c2 = 0; + c3 = 0; + } else if (8 == len8) { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = 0; + c3 = 0; + } else if (12 > len8) { + len8 -= 8; + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ (mp2 & 0xffffffff >> (32 - (len8*8))); + c2 = c2 ^ (c2 & 0xffffffff << ( (len8*8))); + c3 = 0; + } else if (12 == len8) { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = 0; + } else if (16 > len8) { + len8 -= 12; + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = c3 ^ (mp3 & 0xffffffff >> (32 - (len8*8))); + c3 = c3 ^ (c3 & 0xffffffff << ( (len8*8))); + } else { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = c3 ^ mp3; + } + + *(uint32_t*)&c[0] = c0; + *(uint32_t*)&c[4] = c1; + *(uint32_t*)&c[8] = c2; + *(uint32_t*)&c[12] = c3; + +#endif + +} + +void irho_eqov16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s) { + + g8A(s,m); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t c0 = *(uint64_t*)(&c[0]); + uint64_t c1 = *(uint64_t*)(&c[8]); + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t m0 = *(uint64_t*)(&m[0]); + uint64_t m1 = *(uint64_t*)(&m[8]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + + m0 ^= c0; + m1 ^= c1; + + *(uint64_t*)(&s[0]) = s0; + *(uint64_t*)(&s[8]) = s1; + + *(uint64_t*)(&m[0]) = m0; + *(uint64_t*)(&m[8]) = m1; + +#else + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + s2 ^= c2 ^ m2; + s3 ^= c3 ^ m3; + + m0 ^= c0; + m1 ^= c1; + m2 ^= c2; + m3 ^= c3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&m[0]) = m0; + *(uint32_t*)(&m[4]) = m1; + *(uint32_t*)(&m[8]) = m2; + *(uint32_t*)(&m[12]) = m3; + +#endif + +} + +void irho_ud16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char cp [16]; + + pad(c,cp,len8); + + g8A(s,m); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t cp0 = *(uint64_t*)&cp[0]; + uint64_t cp1 = *(uint64_t*)&cp[8]; + uint64_t m0 = *(uint64_t*)&m[0]; + uint64_t m1 = *(uint64_t*)&m[8]; + uint64_t s0 = *(uint64_t*)&s[0]; + uint64_t s1 = *(uint64_t*)&s[8]; + + s0 ^= cp0; + s1 ^= cp1; + + if (0 == len8) { + m0 = 0; + m1 = 0; + } else if (8 > len8) { + s0 = s0 ^ (m0 & 0xffffffffffffffff >> (64 - (len8*8))); + + m0 = m0 ^ (cp0 & 0xffffffffffffffff >> (64 - (len8*8))); + m0 = m0 ^ (m0 & 0xffffffffffffffff << ( (len8*8))); + m1 = 0; + } else if (8 == len8) { + s0 = s0 ^ m0; + + m0 = m0 ^ cp0; + m1 = 0; + } else if (16 > len8) { + len8 -= 8; + s0 = s0 ^ m0; + s1 = s1 ^ (m1 & 0xffffffffffffffff >> (64 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ (cp1 & 0xffffffffffffffff >> (64 - (len8*8))); + m1 = m1 ^ (m1 & 0xffffffffffffffff << ( (len8*8))); + } else { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + } + + *(uint64_t*)&s[0] = s0; + *(uint64_t*)&s[8] = s1; + *(uint64_t*)&m[0] = m0; + *(uint64_t*)&m[8] = m1; + +#else + + uint32_t cp0 = *(uint32_t*)&cp[0]; + uint32_t cp1 = *(uint32_t*)&cp[4]; + uint32_t cp2 = *(uint32_t*)&cp[8]; + uint32_t cp3 = *(uint32_t*)&cp[12]; + uint32_t m0 = *(uint32_t*)&m[0]; + uint32_t m1 = *(uint32_t*)&m[4]; + uint32_t m2 = *(uint32_t*)&m[8]; + uint32_t m3 = *(uint32_t*)&m[12]; + uint32_t s0 = *(uint32_t*)&s[0]; + uint32_t s1 = *(uint32_t*)&s[4]; + uint32_t s2 = *(uint32_t*)&s[8]; + uint32_t s3 = *(uint32_t*)&s[12]; + + s0 ^= cp0; + s1 ^= cp1; + s2 ^= cp2; + s3 ^= cp3; + + if (0 == len8) { + m0 = 0; + m1 = 0; + m2 = 0; + m3 = 0; + } else if (4 > len8) { + s0 = s0 ^ (m0 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ (cp0 & 0xffffffff >> (32 - (len8*8))); + m0 = m0 ^ (m0 & 0xffffffff << ( (len8*8))); + m1 = 0; + m2 = 0; + m3 = 0; + } else if (4 == len8) { + s0 = s0 ^ m0; + + m0 = m0 ^ cp0; + m1 = 0; + m2 = 0; + m3 = 0; + } else if (8 > len8) { + len8 -= 4; + s0 = s0 ^ m0; + s1 = s1 ^ (m1 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ (cp1 & 0xffffffff >> (32 - (len8*8))); + m1 = m1 ^ (m1 & 0xffffffff << ( (len8*8))); + m2 = 0; + m3 = 0; + } else if (8 == len8) { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = 0; + m3 = 0; + } else if (12 > len8) { + len8 -= 8; + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ (m2 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ (cp2 & 0xffffffff >> (32 - (len8*8))); + m2 = m2 ^ (m2 & 0xffffffff << ( (len8*8))); + m3 = 0; + } else if (12 == len8) { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = 0; + } else if (16 > len8) { + len8 -= 12; + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + s3 = s3 ^ (m3 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = m3 ^ (cp3 & 0xffffffff >> (32 - (len8*8))); + m3 = m3 ^ (m3 & 0xffffffff << ( (len8*8))); + } else { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + s3 = s3 ^ m3; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = m3 ^ cp3; + } + + *(uint32_t*)&s[0] = s0; + *(uint32_t*)&s[4] = s1; + *(uint32_t*)&s[8] = s2; + *(uint32_t*)&s[12] = s3; + *(uint32_t*)&m[0] = m0; + *(uint32_t*)&m[4] = m1; + *(uint32_t*)&m[8] = m2; + *(uint32_t*)&m[12] = m3; + +#endif + +} + +void reset_lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&CNT[0]) = 0x0000000000000001; // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0 + +#else + + *(uint32_t*)(&CNT[0]) = 0x00000001; // CNT3 CNT2 CNT1 CNT0 + *(uint32_t*)(&CNT[4]) = 0x00000000; // CNT7 CNT6 CNT5 CNT4 + +#endif + +} + +void lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t C0; + uint64_t fb0; + + C0 = *(uint64_t*)(&CNT[0]); // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C0 = C0 << 1 ^ fb0; + + *(uint64_t*)(&CNT[0]) = C0; + +#else + + uint32_t C0; + uint32_t C1; + uint32_t fb0; + + C0 = *(uint32_t*)(&CNT[0]); // CNT3 CNT2 CNT1 CNT0 + C1 = *(uint32_t*)(&CNT[4]); // CNT7 CNT6 CNT5 CNT4 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C1 = C1 << 1 | C0 >> 31; + C0 = C0 << 1 ^ fb0; + + *(uint32_t*)(&CNT[0]) = C0; + *(uint32_t*)(&CNT[4]) = C1; + +#endif + +} + +void block_cipher( + unsigned char* s, + const unsigned char* k, unsigned char* T, + unsigned char* CNT, unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + CNT[7] = D; + p_skinny_ctrl->func_skinny_128_384_enc(s, p_skinny_ctrl, CNT, T, k); + +} + +void nonce_encryption ( + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + block_cipher(s,k,(unsigned char*)N,CNT,D,p_skinny_ctrl); + +} + +void generate_tag ( + unsigned char** c, unsigned char* s, + unsigned long long* clen) { + + g8A_for_Tag_Generation(s, *c); + + *c = *c + 16; + *c = *c - *clen; + +} + +unsigned long long msg_encryption_eqov16 ( + const unsigned char** M, unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* p_skinny_ctrl) { + + rho_eqov16(*M, *c, s); + *c = *c + 16; + *M = *M + 16; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + return mlen - 16; + +} + +unsigned long long msg_encryption_ud16 ( + const unsigned char** M, unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* p_skinny_ctrl) { + +// char msg[64]; +// +// unsigned int st = (unsigned int )read_cycle(); + + rho_ud16(*M, *c, s, mlen); + +// unsigned int ed = (unsigned int )read_cycle(); +// sprintf(msg, "rho_ud16 %d\n", ed-st); +// SerialPuts(msg); +// +// fprint_bstr(NULL, "c = ", *c, 16); + + *c = *c + mlen; + *M = *M + mlen; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + return 0; + +} + +unsigned long long msg_decryption_eqov16 ( + unsigned char** M, const unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long clen, + skinny_ctrl* p_skinny_ctrl) { + + irho_eqov16(*M, *c, s); + *c = *c + 16; + *M = *M + 16; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + + return clen - 16; + +} + +unsigned long long msg_decryption_ud16 ( + unsigned char** M, const unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long clen, + skinny_ctrl* p_skinny_ctrl) { + + irho_ud16(*M, *c, s, clen); + *c = *c + clen; + *M = *M + clen; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + return 0; + +} + +unsigned long long ad_encryption_eqov32 ( + const unsigned char** A, unsigned char* s, + const unsigned char* k, unsigned long long adlen, + unsigned char* CNT, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + unsigned char T [16]; + + rho_ad_eqov16(*A, s); + *A = *A + 16; + lfsr_gf56(CNT); + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&T[0]) = *(uint64_t*)(&(*A)[0]); + *(uint64_t*)(&T[8]) = *(uint64_t*)(&(*A)[8]); + +#else + + *(uint32_t*)(&T[0]) = *(uint32_t*)(&(*A)[0]); + *(uint32_t*)(&T[4]) = *(uint32_t*)(&(*A)[4]); + *(uint32_t*)(&T[8]) = *(uint32_t*)(&(*A)[8]); + *(uint32_t*)(&T[12]) = *(uint32_t*)(&(*A)[12]); + +#endif + + *A = *A + 16; + block_cipher(s,k,T,CNT,D,p_skinny_ctrl); + lfsr_gf56(CNT); + + return adlen - 32; + +} + +unsigned long long ad_encryption_ov16 ( + const unsigned char** A, unsigned char* s, + const unsigned char* k, unsigned long long adlen, + unsigned char* CNT, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + unsigned char T [16]; + + adlen = adlen - 16; + rho_ad_eqov16(*A, s); + *A = *A + 16; + lfsr_gf56(CNT); + + pad(*A, T, adlen); + *A = *A + adlen; + block_cipher(s,k,T,CNT,D,p_skinny_ctrl); + lfsr_gf56(CNT); + + return 0; + +} + +unsigned long long ad_encryption_eq16 ( + const unsigned char** A, unsigned char* s, + unsigned char* CNT) { + + rho_ad_eqov16(*A, s); + *A = *A + 16; + lfsr_gf56(CNT); + + return 0; + +} + +unsigned long long ad_encryption_ud16( + const unsigned char** A, unsigned char* s, + unsigned long long adlen, + unsigned char* CNT) { + + rho_ad_ud16(*A, s, adlen); + *A = *A + adlen; + lfsr_gf56(CNT); + + return 0; + +} + +int crypto_aead_encrypt ( + unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, + const unsigned char* npub, + const unsigned char* k) { + + unsigned char s[16]; + unsigned char CNT[8]; + const unsigned char* A; + const unsigned char* M; + const unsigned char* N; + + skinny_ctrl ctrl; + ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void) nsec; + A = ad; + M = m; + N = npub; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = 0; + *(uint64_t*)(&s[8]) = 0; + +#else + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#endif + + reset_lfsr_gf56(CNT); + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else while (adlen > 0) { + if (adlen < 16) { // The last block of AD is odd and incomplete + adlen = ad_encryption_ud16(&A,s,adlen,CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 16) { // The last block of AD is odd and complete + adlen = ad_encryption_eq16(&A,s,CNT); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else if (adlen < 32) { // The last block of AD is even and incomplete + adlen = ad_encryption_ov16(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 32) { // The last block of AD is even and complete + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else { // A normal full pair of blocks of AD + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + } + } + + ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + + *clen = mlen + 16; + + if (mlen == 0) { // M is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x15,&ctrl); + } + else while (mlen > 0) { + if (mlen < 16) { // The last block of M is incomplete + mlen = msg_encryption_ud16(&M,&c,N,CNT,s,k,0x15,mlen,&ctrl); + } + else if (mlen == 16) { // The last block of M is complete + mlen = msg_encryption_eqov16(&M,&c,N,CNT,s,k,0x14,mlen,&ctrl); + } + else { // A normal full message block + mlen = msg_encryption_eqov16(&M,&c,N,CNT,s,k,0x04,mlen,&ctrl); + } + } + + // Tag generation + generate_tag(&c,s,clen); + + return 0; + +} + +int crypto_aead_decrypt( + unsigned char *m,unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c,unsigned long long clen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) { + + unsigned char s[16]; + unsigned char T[16]; + unsigned char CNT[8]; + const unsigned char* A; + unsigned char* M; + const unsigned char* N; + + skinny_ctrl ctrl; + ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void) nsec; + A = ad; + M = m; + N = npub; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = 0; + *(uint64_t*)(&s[8]) = 0; + +#else + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#endif + + reset_lfsr_gf56(CNT); + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else while (adlen > 0) { + if (adlen < 16) { // The last block of AD is odd and incomplete + adlen = ad_encryption_ud16(&A,s,adlen,CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 16) { // The last block of AD is odd and complete + adlen = ad_encryption_eq16(&A,s,CNT); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else if (adlen < 32) { // The last block of AD is even and incomplete + adlen = ad_encryption_ov16(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 32) { // The last block of AD is even and complete + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else { // A normal full pair of blocks of AD + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + } + } + + ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + + clen = clen -16; + *mlen = clen; + + if (clen == 0) { // C is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x15,&ctrl); + } + else while (clen > 0) { + if (clen < 16) { // The last block of C is incomplete + clen = msg_decryption_ud16(&M,&c,N,CNT,s,k,0x15,clen,&ctrl); + } + else if (clen == 16) { // The last block of C is complete + clen = msg_decryption_eqov16(&M,&c,N,CNT,s,k,0x14,clen,&ctrl); + } + else { // A normal full message block + clen = msg_decryption_eqov16(&M,&c,N,CNT,s,k,0x04,clen,&ctrl); + } + } + + // Tag generation + g8A_for_Tag_Generation(s, T); + + for (int i = 0; i < 16; i++) { + if (T[i] != (*(c+i))) { + return -1; + } + } + + return 0; + +} diff --git a/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny.h b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny.h new file mode 100644 index 0000000..c8e7b56 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny.h @@ -0,0 +1,106 @@ +#define ___SKINNY_LOOP +//#define ___NUM_OF_ROUNDS_56 +#if (defined(__riscv_xlen) && (__riscv_xlen == 64)) +#define ___ENABLE_DWORD_CAST +#endif + +#include + +typedef struct ___skinny_ctrl { +#ifdef ___NUM_OF_ROUNDS_56 + unsigned char roundKeys[960]; // number of rounds : 56 +#else + unsigned char roundKeys[704]; // number of rounds : 40 +#endif + void (*func_skinny_128_384_enc)(unsigned char*, struct ___skinny_ctrl*, unsigned char* CNT, unsigned char* T, const unsigned char* K); +} skinny_ctrl; + +extern void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); + +#define pack_word(x0, x1, x2, x3, w) \ + w = ((x3) << 24) ^ \ + ((x2) << 16) ^ \ + ((x1) << 8) ^ \ + (x0); + +#define unpack_word(x0, x1, x2, x3, w) \ + x0 = ((w) & 0xff); \ + x1 = (((w) >> 8) & 0xff); \ + x2 = (((w) >> 16) & 0xff); \ + x3 = ((w) >> 24); + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* dw (7 6 5 4 3 2 1 0) */ \ + \ + /* dw (5 7 2 3 6 0 4 1) */ \ + \ + dt0 = dw >> 24; /* - - - 7 6 5 4 3 */ \ + dt0 = dt0 & 0x00000000ff00ff00; /* - - - - 6 - 4 - */ \ + \ + dt1 = dw << 16; /* 5 4 3 2 1 0 - - */ \ + dt1 = dt1 & 0xff00000000ff0000; /* 5 - - - - 0 - - */ \ + dt0 = dt0 ^ dt1; /* 5 - - - 6 0 4 - */ \ + \ + dt1 = dw >> 8; /* - 7 6 5 4 3 2 1 */ \ + dt1 = dt1 & 0x00ff0000000000ff; /* - 7 - - - - - 1 */ \ + dt0 = dt0 ^ dt1; /* 5 7 - - 6 0 4 1 */ \ + \ + dt1 = dw << 8; /* 6 5 4 3 2 1 0 - */ \ + dt1 = dt1 & 0x000000ff00000000; /* - - - 3 - - - - */ \ + dt0 = dt0 ^ dt1; /* 5 7 - 3 6 0 4 1 */ \ + \ + dt1 = dw << 24; /* 4 3 2 1 0 - - - */ \ + dw = dt1 & 0x0000ff0000000000; /* - - 2 - - - - - */ \ + dw = dw ^ dt0; /* 5 7 2 3 6 0 4 1 */ + +#else + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* w0 (3 2 1 0) */ \ + /* w1 (7 6 5 4) */ \ + \ + /* w0 (6 0 4 1) */ \ + /* w1 (5 7 2 3) */ \ + \ + t0 = w1 << 8; /* 6 5 4 - */ \ + t0 = t0 & 0xff00ff00; /* 6 - 4 - */ \ + \ + t1 = w1 << 16; /* 5 4 - - */ \ + t1 = t1 & 0xff000000; /* 5 - - - */ \ + \ + t2 = w1 & 0xff000000; /* 7 - - - */ \ + t2 = t2 >> 8; /* - 7 - - */ \ + t1 = t1 ^ t2; /* 5 7 - - */ \ + \ + t2 = w0 & 0xff000000; /* 3 - - - */ \ + t2 = t2 >> 24; /* - - - 3 */ \ + t1 = t1 ^ t2; /* 5 7 - 3 */ \ + \ + w1 = w0 >> 8; /* - 3 2 1 */ \ + w1 = w1 & 0x0000ff00; /* - - 2 - */ \ + w1 = w1 ^ t1; /* 5 7 2 3 */ \ + \ + t2 = w0 & 0x0000ff00; /* - - 1 - */ \ + t2 = t2 >> 8; /* - - - 1 */ \ + t0 = t0 ^ t2; /* 6 - 4 1 */ \ + \ + w0 = w0 << 16; /* 1 0 - - */ \ + w0 = w0 & 0x00ff0000; /* - 0 - - */ \ + w0 = w0 ^ t0; /* 6 0 4 1 */ + +#endif + diff --git a/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_key_schedule2.c b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_key_schedule2.c new file mode 100644 index 0000000..c2f30de --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_key_schedule2.c @@ -0,0 +1,431 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * load * AC(c0 c1) ^ TK3 + * calc AC(c0 c1) ^ TK2 -> store + * ART(TK2) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + dw = ((dw << 1) & 0xfefefefefefefefe) ^ \ + (((dw >> 7) ^ (dw >> 5)) & 0x0101010101010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2 = dw ^ *tk3; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint64_t* tk2; // used in MACRO + uint64_t* tk3; // used in MACRO + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + // load master key + dw = *(uint64_t*)&roundKeys[16]; + + tk2 = (uint64_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2 = dw ^ *tk3; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[24]; + + tk2 = (uint64_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint64_t* tk2; // used in MACRO + uint64_t* tk3; // used in MACRO + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[16]; + + tk2 = (uint64_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2 = dw ^ *tk3; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[24]; + + tk2 = (uint64_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + w0 = ((w0 << 1) & 0xfefefefe) ^ \ + (((w0 >> 7) ^ (w0 >> 5)) & 0x01010101); \ + w1 = ((w1 << 1) & 0xfefefefe) ^ \ + (((w1 >> 7) ^ (w1 >> 5)) & 0x01010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2++ = w0 ^ *tk3++; \ + *tk2++ = w1 ^ *tk3++; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[16]; + w1 = *(uint32_t*)&roundKeys[20]; + + tk2 = (uint32_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[24]; + w1 = *(uint32_t*)&roundKeys[28]; + + tk2 = (uint32_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[16]; + w1 = *(uint32_t*)&roundKeys[20]; + + tk2 = (uint32_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[24]; + w1 = *(uint32_t*)&roundKeys[28]; + + tk2 = (uint32_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_key_schedule3.c b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_key_schedule3.c new file mode 100644 index 0000000..5dcaf7f --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_key_schedule3.c @@ -0,0 +1,428 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * AC(c0 c1) ^ TK3 -> store + * ART(TK3) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + dw = ((dw >> 1) & 0x7f7f7f7f7f7f7f7f) ^ \ + (((dw << 7) ^ (dw << 1)) & 0x8080808080808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + dt0 = dw ^ c0Val; \ + *tk3 = dt0 ^ ((uint64_t)c1Val << 40); \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys) +{ + uint64_t *tk3; + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[32]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = dw ^ 0x01; + tk3 += 1; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x0); + PERMUTATION_TK3(0xf, 0x1); + PERMUTATION_TK3(0xd, 0x3); + PERMUTATION_TK3(0x7, 0x3); + PERMUTATION_TK3(0xe, 0x1); + PERMUTATION_TK3(0x9, 0x3); + PERMUTATION_TK3(0x7, 0x2); + PERMUTATION_TK3(0xd, 0x1); + PERMUTATION_TK3(0x5, 0x3); + + PERMUTATION_TK3(0x6, 0x1); + PERMUTATION_TK3(0x8, 0x1); + PERMUTATION_TK3(0x1, 0x2); + PERMUTATION_TK3(0x5, 0x0); + PERMUTATION_TK3(0x7, 0x1); + PERMUTATION_TK3(0xc, 0x1); + PERMUTATION_TK3(0x1, 0x3); + PERMUTATION_TK3(0x6, 0x0); + PERMUTATION_TK3(0xb, 0x1); + PERMUTATION_TK3(0xd, 0x2); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x3); + PERMUTATION_TK3(0x2, 0x1); + PERMUTATION_TK3(0x8, 0x0); + PERMUTATION_TK3(0x2, 0x2); + PERMUTATION_TK3(0x9, 0x0); + PERMUTATION_TK3(0x6, 0x2); + PERMUTATION_TK3(0x9, 0x1); + PERMUTATION_TK3(0x5, 0x2); + +#endif + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[40]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x0); + PERMUTATION_TK3(0xf, 0x0); + PERMUTATION_TK3(0xe, 0x3); + PERMUTATION_TK3(0xb, 0x3); + PERMUTATION_TK3(0xf, 0x2); + PERMUTATION_TK3(0xc, 0x3); + PERMUTATION_TK3(0x3, 0x3); + PERMUTATION_TK3(0xe, 0x0); + PERMUTATION_TK3(0xa, 0x3); + PERMUTATION_TK3(0xb, 0x2); + + PERMUTATION_TK3(0xc, 0x2); + PERMUTATION_TK3(0x0, 0x3); + PERMUTATION_TK3(0x2, 0x0); + PERMUTATION_TK3(0xb, 0x0); + PERMUTATION_TK3(0xe, 0x2); + PERMUTATION_TK3(0x8, 0x3); + PERMUTATION_TK3(0x3, 0x2); + PERMUTATION_TK3(0xd, 0x0); + PERMUTATION_TK3(0x6, 0x3); + PERMUTATION_TK3(0xa, 0x1); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x2); + PERMUTATION_TK3(0x4, 0x2); + PERMUTATION_TK3(0x1, 0x1); + PERMUTATION_TK3(0x4, 0x0); + PERMUTATION_TK3(0x3, 0x1); + PERMUTATION_TK3(0xc, 0x0); + PERMUTATION_TK3(0x2, 0x3); + PERMUTATION_TK3(0xa, 0x0); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC) +{ + uint64_t *tk3; + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + uint64_t c0; + uint64_t c1; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[32]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = dw ^ 0x01; + tk3 += 1; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[40]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = (uint64_t*)&roundKeys[392]; +#else + pRC -= 110; + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + w0 = ((w0 >> 1) & 0x7f7f7f7f) ^ \ + (((w0 << 7) ^ (w0 << 1)) & 0x80808080); \ + w1 = ((w1 >> 1) & 0x7f7f7f7f) ^ \ + (((w1 << 7) ^ (w1 << 1)) & 0x80808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + *tk3++ = w0 ^ c0Val; \ + *tk3++ = w1 ^ ((uint32_t)c1Val << 8); \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[32]; + w1 = *(uint32_t*)&roundKeys[36]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x0); + PERMUTATION_TK3(0xf, 0x1); + PERMUTATION_TK3(0xd, 0x3); + PERMUTATION_TK3(0x7, 0x3); + PERMUTATION_TK3(0xe, 0x1); + PERMUTATION_TK3(0x9, 0x3); + PERMUTATION_TK3(0x7, 0x2); + PERMUTATION_TK3(0xd, 0x1); + PERMUTATION_TK3(0x5, 0x3); + + PERMUTATION_TK3(0x6, 0x1); + PERMUTATION_TK3(0x8, 0x1); + PERMUTATION_TK3(0x1, 0x2); + PERMUTATION_TK3(0x5, 0x0); + PERMUTATION_TK3(0x7, 0x1); + PERMUTATION_TK3(0xc, 0x1); + PERMUTATION_TK3(0x1, 0x3); + PERMUTATION_TK3(0x6, 0x0); + PERMUTATION_TK3(0xb, 0x1); + PERMUTATION_TK3(0xd, 0x2); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x3); + PERMUTATION_TK3(0x2, 0x1); + PERMUTATION_TK3(0x8, 0x0); + PERMUTATION_TK3(0x2, 0x2); + PERMUTATION_TK3(0x9, 0x0); + PERMUTATION_TK3(0x6, 0x2); + PERMUTATION_TK3(0x9, 0x1); + PERMUTATION_TK3(0x5, 0x2); + +#endif + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[40]; + w1 = *(uint32_t*)&roundKeys[44]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x0); + PERMUTATION_TK3(0xf, 0x0); + PERMUTATION_TK3(0xe, 0x3); + PERMUTATION_TK3(0xb, 0x3); + PERMUTATION_TK3(0xf, 0x2); + PERMUTATION_TK3(0xc, 0x3); + PERMUTATION_TK3(0x3, 0x3); + PERMUTATION_TK3(0xe, 0x0); + PERMUTATION_TK3(0xa, 0x3); + PERMUTATION_TK3(0xb, 0x2); + + PERMUTATION_TK3(0xc, 0x2); + PERMUTATION_TK3(0x0, 0x3); + PERMUTATION_TK3(0x2, 0x0); + PERMUTATION_TK3(0xb, 0x0); + PERMUTATION_TK3(0xe, 0x2); + PERMUTATION_TK3(0x8, 0x3); + PERMUTATION_TK3(0x3, 0x2); + PERMUTATION_TK3(0xd, 0x0); + PERMUTATION_TK3(0x6, 0x3); + PERMUTATION_TK3(0xa, 0x1); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x2); + PERMUTATION_TK3(0x4, 0x2); + PERMUTATION_TK3(0x1, 0x1); + PERMUTATION_TK3(0x4, 0x0); + PERMUTATION_TK3(0x3, 0x1); + PERMUTATION_TK3(0xc, 0x0); + PERMUTATION_TK3(0x2, 0x3); + PERMUTATION_TK3(0xa, 0x0); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + uint32_t c0; + uint32_t c1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[32]; + w1 = *(uint32_t*)&roundKeys[36]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[40]; + w1 = *(uint32_t*)&roundKeys[44]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = (uint32_t*)&roundKeys[392]; +#else + pRC -= 110; + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_main.c b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_main.c new file mode 100644 index 0000000..8a6e75f --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1+/opt32a_NEC/skinny_main.c @@ -0,0 +1,675 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * ART(TK1) -> store + * load AC(c0 c1) ^ TK3 ^ TK2 + * load TK1 + * calc AC(c0 c1) ^ TK3 ^ TK2 ^ TK1 -> use at (AC->ART) + * SC->SR->(AC->ART)->MC + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +/* + * S-BOX + */ +unsigned char SBOX[] += { + // Original + 0x65, 0x4c, 0x6a, 0x42, 0x4b, 0x63, 0x43, 0x6b, 0x55, 0x75, 0x5a, 0x7a, 0x53, 0x73, 0x5b, 0x7b, + 0x35, 0x8c, 0x3a, 0x81, 0x89, 0x33, 0x80, 0x3b, 0x95, 0x25, 0x98, 0x2a, 0x90, 0x23, 0x99, 0x2b, + 0xe5, 0xcc, 0xe8, 0xc1, 0xc9, 0xe0, 0xc0, 0xe9, 0xd5, 0xf5, 0xd8, 0xf8, 0xd0, 0xf0, 0xd9, 0xf9, + 0xa5, 0x1c, 0xa8, 0x12, 0x1b, 0xa0, 0x13, 0xa9, 0x05, 0xb5, 0x0a, 0xb8, 0x03, 0xb0, 0x0b, 0xb9, + 0x32, 0x88, 0x3c, 0x85, 0x8d, 0x34, 0x84, 0x3d, 0x91, 0x22, 0x9c, 0x2c, 0x94, 0x24, 0x9d, 0x2d, + 0x62, 0x4a, 0x6c, 0x45, 0x4d, 0x64, 0x44, 0x6d, 0x52, 0x72, 0x5c, 0x7c, 0x54, 0x74, 0x5d, 0x7d, + 0xa1, 0x1a, 0xac, 0x15, 0x1d, 0xa4, 0x14, 0xad, 0x02, 0xb1, 0x0c, 0xbc, 0x04, 0xb4, 0x0d, 0xbd, + 0xe1, 0xc8, 0xec, 0xc5, 0xcd, 0xe4, 0xc4, 0xed, 0xd1, 0xf1, 0xdc, 0xfc, 0xd4, 0xf4, 0xdd, 0xfd, + 0x36, 0x8e, 0x38, 0x82, 0x8b, 0x30, 0x83, 0x39, 0x96, 0x26, 0x9a, 0x28, 0x93, 0x20, 0x9b, 0x29, + 0x66, 0x4e, 0x68, 0x41, 0x49, 0x60, 0x40, 0x69, 0x56, 0x76, 0x58, 0x78, 0x50, 0x70, 0x59, 0x79, + 0xa6, 0x1e, 0xaa, 0x11, 0x19, 0xa3, 0x10, 0xab, 0x06, 0xb6, 0x08, 0xba, 0x00, 0xb3, 0x09, 0xbb, + 0xe6, 0xce, 0xea, 0xc2, 0xcb, 0xe3, 0xc3, 0xeb, 0xd6, 0xf6, 0xda, 0xfa, 0xd3, 0xf3, 0xdb, 0xfb, + 0x31, 0x8a, 0x3e, 0x86, 0x8f, 0x37, 0x87, 0x3f, 0x92, 0x21, 0x9e, 0x2e, 0x97, 0x27, 0x9f, 0x2f, + 0x61, 0x48, 0x6e, 0x46, 0x4f, 0x67, 0x47, 0x6f, 0x51, 0x71, 0x5e, 0x7e, 0x57, 0x77, 0x5f, 0x7f, + 0xa2, 0x18, 0xae, 0x16, 0x1f, 0xa7, 0x17, 0xaf, 0x01, 0xb2, 0x0e, 0xbe, 0x07, 0xb7, 0x0f, 0xbf, + 0xe2, 0xca, 0xee, 0xc6, 0xcf, 0xe7, 0xc7, 0xef, 0xd2, 0xf2, 0xde, 0xfe, 0xd7, 0xf7, 0xdf, 0xff, +}; + + /* + * S-BOX ^ AC(c2) + */ +unsigned char SBOX2[] += { // Original ^ c2(0x02) + 0x67, 0x4e, 0x68, 0x40, 0x49, 0x61, 0x41, 0x69, 0x57, 0x77, 0x58, 0x78, 0x51, 0x71, 0x59, 0x79, + 0x37, 0x8e, 0x38, 0x83, 0x8b, 0x31, 0x82, 0x39, 0x97, 0x27, 0x9a, 0x28, 0x92, 0x21, 0x9b, 0x29, + 0xe7, 0xce, 0xea, 0xc3, 0xcb, 0xe2, 0xc2, 0xeb, 0xd7, 0xf7, 0xda, 0xfa, 0xd2, 0xf2, 0xdb, 0xfb, + 0xa7, 0x1e, 0xaa, 0x10, 0x19, 0xa2, 0x11, 0xab, 0x07, 0xb7, 0x08, 0xba, 0x01, 0xb2, 0x09, 0xbb, + 0x30, 0x8a, 0x3e, 0x87, 0x8f, 0x36, 0x86, 0x3f, 0x93, 0x20, 0x9e, 0x2e, 0x96, 0x26, 0x9f, 0x2f, + 0x60, 0x48, 0x6e, 0x47, 0x4f, 0x66, 0x46, 0x6f, 0x50, 0x70, 0x5e, 0x7e, 0x56, 0x76, 0x5f, 0x7f, + 0xa3, 0x18, 0xae, 0x17, 0x1f, 0xa6, 0x16, 0xaf, 0x00, 0xb3, 0x0e, 0xbe, 0x06, 0xb6, 0x0f, 0xbf, + 0xe3, 0xca, 0xee, 0xc7, 0xcf, 0xe6, 0xc6, 0xef, 0xd3, 0xf3, 0xde, 0xfe, 0xd6, 0xf6, 0xdf, 0xff, + 0x34, 0x8c, 0x3a, 0x80, 0x89, 0x32, 0x81, 0x3b, 0x94, 0x24, 0x98, 0x2a, 0x91, 0x22, 0x99, 0x2b, + 0x64, 0x4c, 0x6a, 0x43, 0x4b, 0x62, 0x42, 0x6b, 0x54, 0x74, 0x5a, 0x7a, 0x52, 0x72, 0x5b, 0x7b, + 0xa4, 0x1c, 0xa8, 0x13, 0x1b, 0xa1, 0x12, 0xa9, 0x04, 0xb4, 0x0a, 0xb8, 0x02, 0xb1, 0x0b, 0xb9, + 0xe4, 0xcc, 0xe8, 0xc0, 0xc9, 0xe1, 0xc1, 0xe9, 0xd4, 0xf4, 0xd8, 0xf8, 0xd1, 0xf1, 0xd9, 0xf9, + 0x33, 0x88, 0x3c, 0x84, 0x8d, 0x35, 0x85, 0x3d, 0x90, 0x23, 0x9c, 0x2c, 0x95, 0x25, 0x9d, 0x2d, + 0x63, 0x4a, 0x6c, 0x44, 0x4d, 0x65, 0x45, 0x6d, 0x53, 0x73, 0x5c, 0x7c, 0x55, 0x75, 0x5d, 0x7d, + 0xa0, 0x1a, 0xac, 0x14, 0x1d, 0xa5, 0x15, 0xad, 0x03, 0xb0, 0x0c, 0xbc, 0x05, 0xb5, 0x0d, 0xbd, + 0xe0, 0xc8, 0xec, 0xc4, 0xcd, 0xe5, 0xc5, 0xed, 0xd0, 0xf0, 0xdc, 0xfc, 0xd5, 0xf5, 0xdd, 0xfd, +}; + +#ifdef ___SKINNY_LOOP +/* + * Round Constants + */ +unsigned char RC[] += { + 0x01, 0x00, 0x03, 0x00, 0x07, 0x00, 0x0f, 0x00, 0x0f, 0x01, 0x0e, 0x03, 0x0d, 0x03, 0x0b, 0x03, + 0x07, 0x03, 0x0f, 0x02, 0x0e, 0x01, 0x0c, 0x03, 0x09, 0x03, 0x03, 0x03, 0x07, 0x02, 0x0e, 0x00, + 0x0d, 0x01, 0x0a, 0x03, 0x05, 0x03, 0x0b, 0x02, 0x06, 0x01, 0x0c, 0x02, 0x08, 0x01, 0x00, 0x03, + 0x01, 0x02, 0x02, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x07, 0x01, 0x0e, 0x02, 0x0c, 0x01, 0x08, 0x03, + 0x01, 0x03, 0x03, 0x02, 0x06, 0x00, 0x0d, 0x00, 0x0b, 0x01, 0x06, 0x03, 0x0d, 0x02, 0x0a, 0x01, +#ifdef ___NUM_OF_ROUNDS_56 + 0x04, 0x03, 0x09, 0x02, 0x02, 0x01, 0x04, 0x02, 0x08, 0x00, 0x01, 0x01, 0x02, 0x02, 0x04, 0x00, + 0x09, 0x00, 0x03, 0x01, 0x06, 0x02, 0x0c, 0x00, 0x09, 0x01, 0x02, 0x03, 0x05, 0x02, 0x0a, 0x00, +#endif + }; +#endif + +extern void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2); +extern void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys); +#ifdef ___SKINNY_LOOP +extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC); +#else +extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys); +#endif + +void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + uint32_t *pt = (uint32_t*)&pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + pt[8] = *(uint32_t*)(&K[0]); + pack_word(K[7], K[4], K[5], K[6], pt[9]); + pt[10] = *(uint32_t*)(&K[8]); + pack_word(K[15], K[12], K[13], K[14], pt[11]); + +#ifdef ___SKINNY_LOOP + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys, RC); +#else + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys); +#endif + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + + pskinny_ctrl->func_skinny_128_384_enc = skinny_128_384_enc12_12; + +} + +void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)T; + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +#define PERMUTATION_TK1() \ + \ +/* permutation */ \ +{ \ + unsigned char tmp0 = roundKeys[0]; \ + unsigned char tmp1 = roundKeys[1]; \ + unsigned char tmp2 = roundKeys[2]; \ + unsigned char tmp3 = roundKeys[3]; \ + unsigned char tmp4 = roundKeys[4]; \ + unsigned char tmp5 = roundKeys[5]; \ + unsigned char tmp6 = roundKeys[6]; \ + unsigned char tmp7 = roundKeys[7]; \ + \ + unsigned char* dst = &roundKeys[8]; \ + \ + /* 5 7 2 3 6 0 4 1 */ \ + *dst++ = tmp1; \ + *dst++ = tmp4; \ + *dst++ = tmp0; \ + *dst++ = tmp6; \ + *dst++ = tmp3; \ + *dst++ = tmp2; \ + *dst++ = tmp7; \ + *dst++ = tmp5; \ + \ + /* 2 5 0 6 7 1 3 4 */ \ + *dst++ = tmp4; \ + *dst++ = tmp3; \ + *dst++ = tmp1; \ + *dst++ = tmp7; \ + *dst++ = tmp6; \ + *dst++ = tmp0; \ + *dst++ = tmp5; \ + *dst++ = tmp2; \ + \ + /* 0 2 1 7 5 4 6 3 */ \ + *dst++ = tmp3; \ + *dst++ = tmp6; \ + *dst++ = tmp4; \ + *dst++ = tmp5; \ + *dst++ = tmp7; \ + *dst++ = tmp1; \ + *dst++ = tmp2; \ + *dst++ = tmp0; \ + \ + /* 1 0 4 5 2 3 7 6 */ \ + *dst++ = tmp6; \ + *dst++ = tmp7; \ + *dst++ = tmp3; \ + *dst++ = tmp2; \ + *dst++ = tmp5; \ + *dst++ = tmp4; \ + *dst++ = tmp0; \ + *dst++ = tmp1; \ + \ + /* 4 1 3 2 0 6 5 7 */ \ + *dst++ = tmp7; \ + *dst++ = tmp5; \ + *dst++ = tmp6; \ + *dst++ = tmp0; \ + *dst++ = tmp2; \ + *dst++ = tmp3; \ + *dst++ = tmp1; \ + *dst++ = tmp4; \ + \ + /* 3 4 6 0 1 7 2 5 */ \ + *dst++ = tmp5; \ + *dst++ = tmp2; \ + *dst++ = tmp7; \ + *dst++ = tmp1; \ + *dst++ = tmp0; \ + *dst++ = tmp6; \ + *dst++ = tmp4; \ + *dst++ = tmp3; \ + \ + /* 6 3 7 1 4 5 0 2 */ \ + *dst++ = tmp2; \ + *dst++ = tmp0; \ + *dst++ = tmp5; \ + *dst++ = tmp4; \ + *dst++ = tmp1; \ + *dst++ = tmp7; \ + *dst++ = tmp3; \ + *dst++ = tmp6; \ +} + +#define SBOX_0(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t0; \ + b1 = (uint8_t)t1; \ + b2 = (uint8_t)t2; \ + b3 = (uint8_t)t3; + +#define SBOX_8(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t3; \ + b1 = (uint8_t)t0; \ + b2 = (uint8_t)t1; \ + b3 = (uint8_t)t2; + +#define SBOX_16(b0, b1, b2, b3) \ + \ + t0 = sbox2[b0]; /* AC(c2) */ \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t2; \ + b1 = (uint8_t)t3; \ + b2 = (uint8_t)t0; \ + b3 = (uint8_t)t1; + +#define SBOX_24(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t1; \ + b1 = (uint8_t)t2; \ + b2 = (uint8_t)t3; \ + b3 = (uint8_t)t0; + +#ifdef ___ENABLE_DWORD_CAST + +#define SKINNY_MAIN() \ +{ \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK1^TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint64_t*)&block[0]; \ + t1 ^= *tk1++; \ + t1 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint64_t*)&block[8]; \ + t0 = t2 >> 32; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = (t1 >> 32) ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t0 ^ t3; \ + \ + *(uint32_t*)&block[0] = (uint32_t)t0; \ + *(uint32_t*)&block[4] = (uint32_t)t1; \ + *(uint32_t*)&block[8] = (uint32_t)t2; \ + *(uint32_t*)&block[12] = (uint32_t)t3; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint64_t*)&block[0]; \ + t1 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint64_t*)&block[8]; \ + t0 = t2 >> 32; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = (t1 >> 32) ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t0 ^ t3; \ + \ + *(uint32_t*)&block[0] = (uint32_t)t0; \ + *(uint32_t*)&block[4] = (uint32_t)t1; \ + *(uint32_t*)&block[8] = (uint32_t)t2; \ + *(uint32_t*)&block[12] = (uint32_t)t3; \ +} + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint64_t *tk1; + uint64_t *tk2; + uint64_t t0; // used in MACRO + uint64_t t1; // used in MACRO + uint64_t t2; // used in MACRO + uint64_t t3; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint64_t*)&roundKeys[64]; + tk1 = (uint64_t*)&roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint64_t *tk1; + uint64_t *tk2; + uint64_t t0; // used in MACRO + uint64_t t1; // used in MACRO + uint64_t t2; // used in MACRO + uint64_t t3; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint64_t*)&roundKeys[64]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = (uint64_t*)&roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = (uint64_t*)&roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define SKINNY_MAIN() \ +{ \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK1^TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint32_t*)&block[0]; \ + t0 = *(uint32_t*)&block[4]; \ + t1 ^= *tk1++; \ + t1 ^= *tk2++; \ + t0 ^= *tk1++; \ + t0 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint32_t*)&block[8]; \ + t4 = *(uint32_t*)&block[12]; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = t0 ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t3 ^ t4; \ + \ + *(uint32_t*)&block[0] = t0; \ + *(uint32_t*)&block[4] = t1; \ + *(uint32_t*)&block[8] = t2; \ + *(uint32_t*)&block[12] = t3; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint32_t*)&block[0]; \ + t0 = *(uint32_t*)&block[4]; \ + t1 ^= *tk2++; \ + t0 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint32_t*)&block[8]; \ + t4 = *(uint32_t*)&block[12]; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = t0 ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t3 ^ t4; \ + \ + *(uint32_t*)&block[0] = t0; \ + *(uint32_t*)&block[4] = t1; \ + *(uint32_t*)&block[8] = t2; \ + *(uint32_t*)&block[12] = t3; \ +} + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t t4; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint32_t*)&roundKeys[64]; + tk1 = (uint32_t*)&roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t t4; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint32_t*)&roundKeys[64]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = (uint32_t*)&roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = (uint32_t*)&roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/api.h b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/api.h new file mode 100644 index 0000000..a4aa567 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 diff --git a/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/encrypt.c b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/encrypt.c new file mode 100644 index 0000000..f329721 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/encrypt.c @@ -0,0 +1,1136 @@ +/* + * Date: 29 November 2018 + * Contact: Thomas Peyrin - thomas.peyrin@gmail.com + * Mustafa Khairallah - mustafam001@e.ntu.edu.sg + */ + +#include "crypto_aead.h" +#include "api.h" +#include "skinny.h" +#include +#include + +void pad (const unsigned char* m, unsigned char* mp, int len8) { + +#ifdef ___ENABLE_DWORD_CAST + + if (0 == len8) { + *(uint64_t*)(&mp[0]) = 0; + *(uint64_t*)(&mp[8]) = 0; + } else if (8 > len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]) & (0xffffffffffffffff >> (64 - len8*8)); + *(uint64_t*)(&mp[8]) = 0; + mp[15] = len8; + } else if (8 == len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = 0; + mp[15] = 8; + } else if (16 > len8) { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]) & (0xffffffffffffffff >> (128 - len8*8)); + mp[15] = len8; + } else { + *(uint64_t*)(&mp[0]) = *(uint64_t*)(&m[0]); + *(uint64_t*)(&mp[8]) = *(uint64_t*)(&m[8]); + } + +#else + + if (0 == len8) { + *(uint32_t*)(&mp[0]) = 0; + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + } else if (4 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]) & (0xffffffff >> (32 - len8*8)); + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (4 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = 0; + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 4; + } else if (8 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]) & (0xffffffff >> (64 - len8*8)); + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (8 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = 0; + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 8; + } else if (12 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]) & (0xffffffff >> (96 - len8*8)); + *(uint32_t*)(&mp[12]) = 0; + mp[15] = len8; + } else if (12 == len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = 0; + mp[15] = 12; + } else if (16 > len8) { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]) & (0xffffffff >> (128 - len8*8)); + mp[15] = len8; + } else { + *(uint32_t*)(&mp[0]) = *(uint32_t*)(&m[0]); + *(uint32_t*)(&mp[4]) = *(uint32_t*)(&m[4]); + *(uint32_t*)(&mp[8]) = *(uint32_t*)(&m[8]); + *(uint32_t*)(&mp[12]) = *(uint32_t*)(&m[12]); + } + +#endif + +} + +void g8A (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t c0, c1; + + c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080); + + *(uint64_t*)(&c[0]) = c0; + *(uint64_t*)(&c[8]) = c1; + +#else + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#endif + +} + +void g8A_for_Tag_Generation (unsigned char* s, unsigned char* c) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t c0, c1; + + c0 = ((s0 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x8080808080808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x8080808080808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = (c0>>24)&0xFF; + c[4] = (c0>>32)&0xFF; + c[5] = (c0>>40)&0xFF; + c[6] = (c0>>48)&0xFF; + c[7] = c0>>56; + c[8] = c1 &0xFF; + c[9] = (c1>>8) &0xFF; + c[10] = (c1>>16)&0xFF; + c[11] = (c1>>24)&0xFF; + c[12] = (c1>>32)&0xFF; + c[13] = (c1>>40)&0xFF; + c[14] = (c1>>48)&0xFF; + c[15] = c1>>56; + +#else + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t c0, c1, c2, c3; + + c0 = ((s0 >> 1) & 0x7f7f7f7f) ^ ((s0 ^ (s0 << 7)) & 0x80808080); + c1 = ((s1 >> 1) & 0x7f7f7f7f) ^ ((s1 ^ (s1 << 7)) & 0x80808080); + c2 = ((s2 >> 1) & 0x7f7f7f7f) ^ ((s2 ^ (s2 << 7)) & 0x80808080); + c3 = ((s3 >> 1) & 0x7f7f7f7f) ^ ((s3 ^ (s3 << 7)) & 0x80808080); + + // use byte access because of memory alignment. + // c is not always in word(4 byte) alignment. + c[0] = c0 &0xFF; + c[1] = (c0>>8) &0xFF; + c[2] = (c0>>16)&0xFF; + c[3] = c0>>24; + c[4] = c1 &0xFF; + c[5] = (c1>>8) &0xFF; + c[6] = (c1>>16)&0xFF; + c[7] = c1>>24; + c[8] = c2 &0xFF; + c[9] = (c2>>8) &0xFF; + c[10] = (c2>>16)&0xFF; + c[11] = c2>>24; + c[12] = c3 &0xFF; + c[13] = (c3>>8) &0xFF; + c[14] = (c3>>16)&0xFF; + c[15] = c3>>24; + +#endif + +} + +void rho_ad_eqov16 ( + const unsigned char* m, + unsigned char* s) { + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) ^= *(uint64_t*)(&m[0]); + *(uint64_t*)(&s[8]) ^= *(uint64_t*)(&m[8]); + +#else + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&m[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&m[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&m[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&m[12]); + +#endif + +} + +void rho_ad_ud16 ( + const unsigned char* m, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + pad(m,mp,len8); + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) ^= *(uint64_t*)(&mp[0]); + *(uint64_t*)(&s[8]) ^= *(uint64_t*)(&mp[8]); + +#else + + *(uint32_t*)(&s[0]) ^= *(uint32_t*)(&mp[0]); + *(uint32_t*)(&s[4]) ^= *(uint32_t*)(&mp[4]); + *(uint32_t*)(&s[8]) ^= *(uint32_t*)(&mp[8]); + *(uint32_t*)(&s[12]) ^= *(uint32_t*)(&mp[12]); + +#endif + +} + +void rho_eqov16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s) { + + g8A(s,c); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t c0 = *(uint64_t*)(&c[0]); + uint64_t c1 = *(uint64_t*)(&c[8]); + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t m0 = *(uint64_t*)(&m[0]); + uint64_t m1 = *(uint64_t*)(&m[8]); + + s0 ^= m0; + s1 ^= m1; + + c0 ^= m0; + c1 ^= m1; + + *(uint64_t*)(&s[0]) = s0; + *(uint64_t*)(&s[8]) = s1; + + *(uint64_t*)(&c[0]) = c0; + *(uint64_t*)(&c[8]) = c1; + +#else + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= m0; + s1 ^= m1; + s2 ^= m2; + s3 ^= m3; + + c0 ^= m0; + c1 ^= m1; + c2 ^= m2; + c3 ^= m3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&c[0]) = c0; + *(uint32_t*)(&c[4]) = c1; + *(uint32_t*)(&c[8]) = c2; + *(uint32_t*)(&c[12]) = c3; + +#endif + +} + +void rho_ud16 ( + const unsigned char* m, + unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char mp [16]; + + pad(m,mp,len8); + + g8A(s,c); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t mp0 = *(uint64_t*)&mp[0]; + uint64_t mp1 = *(uint64_t*)&mp[8]; + uint64_t c0 = *(uint64_t*)&c[0]; + uint64_t c1 = *(uint64_t*)&c[8]; + + *(uint64_t*)(&s[0]) ^= mp0; + *(uint64_t*)(&s[8]) ^= mp1; + + if (0 == len8) { + c0 = 0; + c1 = 0; + } else if (8 > len8) { + c0 = c0 ^ (mp0 & 0xffffffffffffffff >> (64 - (len8*8))); + c0 = c0 ^ (c0 & 0xffffffffffffffff << ( (len8*8))); + c1 = 0; + } else if (8 == len8) { + c0 = c0 ^ mp0; + c1 = 0; + } else if (16 > len8) { + len8 -= 8; + c0 = c0 ^ mp0; + c1 = c1 ^ (mp1 & 0xffffffffffffffff >> (64 - (len8*8))); + c1 = c1 ^ (c1 & 0xffffffffffffffff << ( (len8*8))); + } else { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + } + + *(uint64_t*)&c[0] = c0; + *(uint64_t*)&c[8] = c1; + +#else + + uint32_t mp0 = *(uint32_t*)&mp[0]; + uint32_t mp1 = *(uint32_t*)&mp[4]; + uint32_t mp2 = *(uint32_t*)&mp[8]; + uint32_t mp3 = *(uint32_t*)&mp[12]; + uint32_t c0 = *(uint32_t*)&c[0]; + uint32_t c1 = *(uint32_t*)&c[4]; + uint32_t c2 = *(uint32_t*)&c[8]; + uint32_t c3 = *(uint32_t*)&c[12]; + + *(uint32_t*)(&s[0]) ^= mp0; + *(uint32_t*)(&s[4]) ^= mp1; + *(uint32_t*)(&s[8]) ^= mp2; + *(uint32_t*)(&s[12]) ^= mp3; + + if (0 == len8) { + c0 = 0; + c1 = 0; + c2 = 0; + c3 = 0; + } else if (4 > len8) { + c0 = c0 ^ (mp0 & 0xffffffff >> (32 - (len8*8))); + c0 = c0 ^ (c0 & 0xffffffff << ( (len8*8))); + c1 = 0; + c2 = 0; + c3 = 0; + } else if (4 == len8) { + c0 = c0 ^ mp0; + c1 = 0; + c2 = 0; + c3 = 0; + } else if (8 > len8) { + len8 -= 4; + c0 = c0 ^ mp0; + c1 = c1 ^ (mp1 & 0xffffffff >> (32 - (len8*8))); + c1 = c1 ^ (c1 & 0xffffffff << ( (len8*8))); + c2 = 0; + c3 = 0; + } else if (8 == len8) { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = 0; + c3 = 0; + } else if (12 > len8) { + len8 -= 8; + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ (mp2 & 0xffffffff >> (32 - (len8*8))); + c2 = c2 ^ (c2 & 0xffffffff << ( (len8*8))); + c3 = 0; + } else if (12 == len8) { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = 0; + } else if (16 > len8) { + len8 -= 12; + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = c3 ^ (mp3 & 0xffffffff >> (32 - (len8*8))); + c3 = c3 ^ (c3 & 0xffffffff << ( (len8*8))); + } else { + c0 = c0 ^ mp0; + c1 = c1 ^ mp1; + c2 = c2 ^ mp2; + c3 = c3 ^ mp3; + } + + *(uint32_t*)&c[0] = c0; + *(uint32_t*)&c[4] = c1; + *(uint32_t*)&c[8] = c2; + *(uint32_t*)&c[12] = c3; + +#endif + +} + +void irho_eqov16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s) { + + g8A(s,m); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t c0 = *(uint64_t*)(&c[0]); + uint64_t c1 = *(uint64_t*)(&c[8]); + + uint64_t s0 = *(uint64_t*)(&s[0]); + uint64_t s1 = *(uint64_t*)(&s[8]); + + uint64_t m0 = *(uint64_t*)(&m[0]); + uint64_t m1 = *(uint64_t*)(&m[8]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + + m0 ^= c0; + m1 ^= c1; + + *(uint64_t*)(&s[0]) = s0; + *(uint64_t*)(&s[8]) = s1; + + *(uint64_t*)(&m[0]) = m0; + *(uint64_t*)(&m[8]) = m1; + +#else + + uint32_t c0 = *(uint32_t*)(&c[0]); + uint32_t c1 = *(uint32_t*)(&c[4]); + uint32_t c2 = *(uint32_t*)(&c[8]); + uint32_t c3 = *(uint32_t*)(&c[12]); + + uint32_t s0 = *(uint32_t*)(&s[0]); + uint32_t s1 = *(uint32_t*)(&s[4]); + uint32_t s2 = *(uint32_t*)(&s[8]); + uint32_t s3 = *(uint32_t*)(&s[12]); + + uint32_t m0 = *(uint32_t*)(&m[0]); + uint32_t m1 = *(uint32_t*)(&m[4]); + uint32_t m2 = *(uint32_t*)(&m[8]); + uint32_t m3 = *(uint32_t*)(&m[12]); + + s0 ^= c0 ^ m0; + s1 ^= c1 ^ m1; + s2 ^= c2 ^ m2; + s3 ^= c3 ^ m3; + + m0 ^= c0; + m1 ^= c1; + m2 ^= c2; + m3 ^= c3; + + *(uint32_t*)(&s[0]) = s0; + *(uint32_t*)(&s[4]) = s1; + *(uint32_t*)(&s[8]) = s2; + *(uint32_t*)(&s[12]) = s3; + + *(uint32_t*)(&m[0]) = m0; + *(uint32_t*)(&m[4]) = m1; + *(uint32_t*)(&m[8]) = m2; + *(uint32_t*)(&m[12]) = m3; + +#endif + +} + +void irho_ud16 ( + unsigned char* m, + const unsigned char* c, + unsigned char* s, + int len8) { + + unsigned char cp [16]; + + pad(c,cp,len8); + + g8A(s,m); + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t cp0 = *(uint64_t*)&cp[0]; + uint64_t cp1 = *(uint64_t*)&cp[8]; + uint64_t m0 = *(uint64_t*)&m[0]; + uint64_t m1 = *(uint64_t*)&m[8]; + uint64_t s0 = *(uint64_t*)&s[0]; + uint64_t s1 = *(uint64_t*)&s[8]; + + s0 ^= cp0; + s1 ^= cp1; + + if (0 == len8) { + m0 = 0; + m1 = 0; + } else if (8 > len8) { + s0 = s0 ^ (m0 & 0xffffffffffffffff >> (64 - (len8*8))); + + m0 = m0 ^ (cp0 & 0xffffffffffffffff >> (64 - (len8*8))); + m0 = m0 ^ (m0 & 0xffffffffffffffff << ( (len8*8))); + m1 = 0; + } else if (8 == len8) { + s0 = s0 ^ m0; + + m0 = m0 ^ cp0; + m1 = 0; + } else if (16 > len8) { + len8 -= 8; + s0 = s0 ^ m0; + s1 = s1 ^ (m1 & 0xffffffffffffffff >> (64 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ (cp1 & 0xffffffffffffffff >> (64 - (len8*8))); + m1 = m1 ^ (m1 & 0xffffffffffffffff << ( (len8*8))); + } else { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + } + + *(uint64_t*)&s[0] = s0; + *(uint64_t*)&s[8] = s1; + *(uint64_t*)&m[0] = m0; + *(uint64_t*)&m[8] = m1; + +#else + + uint32_t cp0 = *(uint32_t*)&cp[0]; + uint32_t cp1 = *(uint32_t*)&cp[4]; + uint32_t cp2 = *(uint32_t*)&cp[8]; + uint32_t cp3 = *(uint32_t*)&cp[12]; + uint32_t m0 = *(uint32_t*)&m[0]; + uint32_t m1 = *(uint32_t*)&m[4]; + uint32_t m2 = *(uint32_t*)&m[8]; + uint32_t m3 = *(uint32_t*)&m[12]; + uint32_t s0 = *(uint32_t*)&s[0]; + uint32_t s1 = *(uint32_t*)&s[4]; + uint32_t s2 = *(uint32_t*)&s[8]; + uint32_t s3 = *(uint32_t*)&s[12]; + + s0 ^= cp0; + s1 ^= cp1; + s2 ^= cp2; + s3 ^= cp3; + + if (0 == len8) { + m0 = 0; + m1 = 0; + m2 = 0; + m3 = 0; + } else if (4 > len8) { + s0 = s0 ^ (m0 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ (cp0 & 0xffffffff >> (32 - (len8*8))); + m0 = m0 ^ (m0 & 0xffffffff << ( (len8*8))); + m1 = 0; + m2 = 0; + m3 = 0; + } else if (4 == len8) { + s0 = s0 ^ m0; + + m0 = m0 ^ cp0; + m1 = 0; + m2 = 0; + m3 = 0; + } else if (8 > len8) { + len8 -= 4; + s0 = s0 ^ m0; + s1 = s1 ^ (m1 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ (cp1 & 0xffffffff >> (32 - (len8*8))); + m1 = m1 ^ (m1 & 0xffffffff << ( (len8*8))); + m2 = 0; + m3 = 0; + } else if (8 == len8) { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = 0; + m3 = 0; + } else if (12 > len8) { + len8 -= 8; + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ (m2 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ (cp2 & 0xffffffff >> (32 - (len8*8))); + m2 = m2 ^ (m2 & 0xffffffff << ( (len8*8))); + m3 = 0; + } else if (12 == len8) { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = 0; + } else if (16 > len8) { + len8 -= 12; + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + s3 = s3 ^ (m3 & 0xffffffff >> (32 - (len8*8))); + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = m3 ^ (cp3 & 0xffffffff >> (32 - (len8*8))); + m3 = m3 ^ (m3 & 0xffffffff << ( (len8*8))); + } else { + s0 = s0 ^ m0; + s1 = s1 ^ m1; + s2 = s2 ^ m2; + s3 = s3 ^ m3; + + m0 = m0 ^ cp0; + m1 = m1 ^ cp1; + m2 = m2 ^ cp2; + m3 = m3 ^ cp3; + } + + *(uint32_t*)&s[0] = s0; + *(uint32_t*)&s[4] = s1; + *(uint32_t*)&s[8] = s2; + *(uint32_t*)&s[12] = s3; + *(uint32_t*)&m[0] = m0; + *(uint32_t*)&m[4] = m1; + *(uint32_t*)&m[8] = m2; + *(uint32_t*)&m[12] = m3; + +#endif + +} + +void reset_lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&CNT[0]) = 0x0000000000000001; // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0 + +#else + + *(uint32_t*)(&CNT[0]) = 0x00000001; // CNT3 CNT2 CNT1 CNT0 + *(uint32_t*)(&CNT[4]) = 0x00000000; // CNT7 CNT6 CNT5 CNT4 + +#endif + +} + +void lfsr_gf56 (unsigned char* CNT) { + +#ifdef ___ENABLE_DWORD_CAST + + uint64_t C0; + uint64_t fb0; + + C0 = *(uint64_t*)(&CNT[0]); // CNT7 CNT6 CNT5 CNT4 CNT3 CNT2 CNT1 CNT0 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C0 = C0 << 1 ^ fb0; + + *(uint64_t*)(&CNT[0]) = C0; + +#else + + uint32_t C0; + uint32_t C1; + uint32_t fb0; + + C0 = *(uint32_t*)(&CNT[0]); // CNT3 CNT2 CNT1 CNT0 + C1 = *(uint32_t*)(&CNT[4]); // CNT7 CNT6 CNT5 CNT4 + + fb0 = 0; + if (CNT[6] & 0x80) { + fb0 = 0x95; + } + + C1 = C1 << 1 | C0 >> 31; + C0 = C0 << 1 ^ fb0; + + *(uint32_t*)(&CNT[0]) = C0; + *(uint32_t*)(&CNT[4]) = C1; + +#endif + +} + +void block_cipher( + unsigned char* s, + const unsigned char* k, unsigned char* T, + unsigned char* CNT, unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + CNT[7] = D; + p_skinny_ctrl->func_skinny_128_384_enc(s, p_skinny_ctrl, CNT, T, k); + +} + +void nonce_encryption ( + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + block_cipher(s,k,(unsigned char*)N,CNT,D,p_skinny_ctrl); + +} + +void generate_tag ( + unsigned char** c, unsigned char* s, + unsigned long long* clen) { + + g8A_for_Tag_Generation(s, *c); + + *c = *c + 16; + *c = *c - *clen; + +} + +unsigned long long msg_encryption_eqov16 ( + const unsigned char** M, unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* p_skinny_ctrl) { + + rho_eqov16(*M, *c, s); + *c = *c + 16; + *M = *M + 16; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + return mlen - 16; + +} + +unsigned long long msg_encryption_ud16 ( + const unsigned char** M, unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long mlen, + skinny_ctrl* p_skinny_ctrl) { + +// char msg[64]; +// +// unsigned int st = (unsigned int )read_cycle(); + + rho_ud16(*M, *c, s, mlen); + +// unsigned int ed = (unsigned int )read_cycle(); +// sprintf(msg, "rho_ud16 %d\n", ed-st); +// SerialPuts(msg); +// +// fprint_bstr(NULL, "c = ", *c, 16); + + *c = *c + mlen; + *M = *M + mlen; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + return 0; + +} + +unsigned long long msg_decryption_eqov16 ( + unsigned char** M, const unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long clen, + skinny_ctrl* p_skinny_ctrl) { + + irho_eqov16(*M, *c, s); + *c = *c + 16; + *M = *M + 16; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + + return clen - 16; + +} + +unsigned long long msg_decryption_ud16 ( + unsigned char** M, const unsigned char** c, + const unsigned char* N, + unsigned char* CNT, + unsigned char*s, const unsigned char* k, + unsigned char D, + unsigned long long clen, + skinny_ctrl* p_skinny_ctrl) { + + irho_ud16(*M, *c, s, clen); + *c = *c + clen; + *M = *M + clen; + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,D,p_skinny_ctrl); + return 0; + +} + +unsigned long long ad_encryption_eqov32 ( + const unsigned char** A, unsigned char* s, + const unsigned char* k, unsigned long long adlen, + unsigned char* CNT, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + unsigned char T [16]; + + rho_ad_eqov16(*A, s); + *A = *A + 16; + lfsr_gf56(CNT); + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&T[0]) = *(uint64_t*)(&(*A)[0]); + *(uint64_t*)(&T[8]) = *(uint64_t*)(&(*A)[8]); + +#else + + *(uint32_t*)(&T[0]) = *(uint32_t*)(&(*A)[0]); + *(uint32_t*)(&T[4]) = *(uint32_t*)(&(*A)[4]); + *(uint32_t*)(&T[8]) = *(uint32_t*)(&(*A)[8]); + *(uint32_t*)(&T[12]) = *(uint32_t*)(&(*A)[12]); + +#endif + + *A = *A + 16; + block_cipher(s,k,T,CNT,D,p_skinny_ctrl); + lfsr_gf56(CNT); + + return adlen - 32; + +} + +unsigned long long ad_encryption_ov16 ( + const unsigned char** A, unsigned char* s, + const unsigned char* k, unsigned long long adlen, + unsigned char* CNT, + unsigned char D, + skinny_ctrl* p_skinny_ctrl) { + + unsigned char T [16]; + + adlen = adlen - 16; + rho_ad_eqov16(*A, s); + *A = *A + 16; + lfsr_gf56(CNT); + + pad(*A, T, adlen); + *A = *A + adlen; + block_cipher(s,k,T,CNT,D,p_skinny_ctrl); + lfsr_gf56(CNT); + + return 0; + +} + +unsigned long long ad_encryption_eq16 ( + const unsigned char** A, unsigned char* s, + unsigned char* CNT) { + + rho_ad_eqov16(*A, s); + *A = *A + 16; + lfsr_gf56(CNT); + + return 0; + +} + +unsigned long long ad_encryption_ud16( + const unsigned char** A, unsigned char* s, + unsigned long long adlen, + unsigned char* CNT) { + + rho_ad_ud16(*A, s, adlen); + *A = *A + adlen; + lfsr_gf56(CNT); + + return 0; + +} + +int crypto_aead_encrypt ( + unsigned char* c, unsigned long long* clen, + const unsigned char* m, unsigned long long mlen, + const unsigned char* ad, unsigned long long adlen, + const unsigned char* nsec, + const unsigned char* npub, + const unsigned char* k) { + + unsigned char s[16]; + unsigned char CNT[8]; + const unsigned char* A; + const unsigned char* M; + const unsigned char* N; + + skinny_ctrl ctrl; + ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void) nsec; + A = ad; + M = m; + N = npub; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = 0; + *(uint64_t*)(&s[8]) = 0; + +#else + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#endif + + reset_lfsr_gf56(CNT); + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else while (adlen > 0) { + if (adlen < 16) { // The last block of AD is odd and incomplete + adlen = ad_encryption_ud16(&A,s,adlen,CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 16) { // The last block of AD is odd and complete + adlen = ad_encryption_eq16(&A,s,CNT); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else if (adlen < 32) { // The last block of AD is even and incomplete + adlen = ad_encryption_ov16(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 32) { // The last block of AD is even and complete + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else { // A normal full pair of blocks of AD + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + } + } + + ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + + *clen = mlen + 16; + + if (mlen == 0) { // M is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x15,&ctrl); + } + else while (mlen > 0) { + if (mlen < 16) { // The last block of M is incomplete + mlen = msg_encryption_ud16(&M,&c,N,CNT,s,k,0x15,mlen,&ctrl); + } + else if (mlen == 16) { // The last block of M is complete + mlen = msg_encryption_eqov16(&M,&c,N,CNT,s,k,0x14,mlen,&ctrl); + } + else { // A normal full message block + mlen = msg_encryption_eqov16(&M,&c,N,CNT,s,k,0x04,mlen,&ctrl); + } + } + + // Tag generation + generate_tag(&c,s,clen); + + return 0; + +} + +int crypto_aead_decrypt( + unsigned char *m,unsigned long long *mlen, + unsigned char *nsec, + const unsigned char *c,unsigned long long clen, + const unsigned char *ad,unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) { + + unsigned char s[16]; + unsigned char T[16]; + unsigned char CNT[8]; + const unsigned char* A; + unsigned char* M; + const unsigned char* N; + + skinny_ctrl ctrl; + ctrl.func_skinny_128_384_enc = skinny_128_384_enc123_12; + + (void) nsec; + A = ad; + M = m; + N = npub; + +#ifdef ___ENABLE_DWORD_CAST + + *(uint64_t*)(&s[0]) = 0; + *(uint64_t*)(&s[8]) = 0; + +#else + + *(uint32_t*)(&s[0]) = 0; + *(uint32_t*)(&s[4]) = 0; + *(uint32_t*)(&s[8]) = 0; + *(uint32_t*)(&s[12]) = 0; + +#endif + + reset_lfsr_gf56(CNT); + + if (adlen == 0) { // AD is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else while (adlen > 0) { + if (adlen < 16) { // The last block of AD is odd and incomplete + adlen = ad_encryption_ud16(&A,s,adlen,CNT); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 16) { // The last block of AD is odd and complete + adlen = ad_encryption_eq16(&A,s,CNT); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else if (adlen < 32) { // The last block of AD is even and incomplete + adlen = ad_encryption_ov16(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x1a,&ctrl); + } + else if (adlen == 32) { // The last block of AD is even and complete + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + nonce_encryption(N,CNT,s,k,0x18,&ctrl); + } + else { // A normal full pair of blocks of AD + adlen = ad_encryption_eqov32(&A,s,k,adlen,CNT,0x08,&ctrl); + } + } + + ctrl.func_skinny_128_384_enc = skinny_128_384_enc1_1; + + reset_lfsr_gf56(CNT); + + clen = clen -16; + *mlen = clen; + + if (clen == 0) { // C is an empty string + lfsr_gf56(CNT); + nonce_encryption(N,CNT,s,k,0x15,&ctrl); + } + else while (clen > 0) { + if (clen < 16) { // The last block of C is incomplete + clen = msg_decryption_ud16(&M,&c,N,CNT,s,k,0x15,clen,&ctrl); + } + else if (clen == 16) { // The last block of C is complete + clen = msg_decryption_eqov16(&M,&c,N,CNT,s,k,0x14,clen,&ctrl); + } + else { // A normal full message block + clen = msg_decryption_eqov16(&M,&c,N,CNT,s,k,0x04,clen,&ctrl); + } + } + + // Tag generation + g8A_for_Tag_Generation(s, T); + + for (int i = 0; i < 16; i++) { + if (T[i] != (*(c+i))) { + return -1; + } + } + + return 0; + +} diff --git a/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny.h b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny.h new file mode 100644 index 0000000..826f2f8 --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny.h @@ -0,0 +1,106 @@ +#define ___SKINNY_LOOP +#define ___NUM_OF_ROUNDS_56 +#if (defined(__riscv_xlen) && (__riscv_xlen == 64)) +#define ___ENABLE_DWORD_CAST +#endif + +#include + +typedef struct ___skinny_ctrl { +#ifdef ___NUM_OF_ROUNDS_56 + unsigned char roundKeys[960]; // number of rounds : 56 +#else + unsigned char roundKeys[704]; // number of rounds : 40 +#endif + void (*func_skinny_128_384_enc)(unsigned char*, struct ___skinny_ctrl*, unsigned char* CNT, unsigned char* T, const unsigned char* K); +} skinny_ctrl; + +extern void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K); + +#define pack_word(x0, x1, x2, x3, w) \ + w = ((x3) << 24) ^ \ + ((x2) << 16) ^ \ + ((x1) << 8) ^ \ + (x0); + +#define unpack_word(x0, x1, x2, x3, w) \ + x0 = ((w) & 0xff); \ + x1 = (((w) >> 8) & 0xff); \ + x2 = (((w) >> 16) & 0xff); \ + x3 = ((w) >> 24); + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* dw (7 6 5 4 3 2 1 0) */ \ + \ + /* dw (5 7 2 3 6 0 4 1) */ \ + \ + dt0 = dw >> 24; /* - - - 7 6 5 4 3 */ \ + dt0 = dt0 & 0x00000000ff00ff00; /* - - - - 6 - 4 - */ \ + \ + dt1 = dw << 16; /* 5 4 3 2 1 0 - - */ \ + dt1 = dt1 & 0xff00000000ff0000; /* 5 - - - - 0 - - */ \ + dt0 = dt0 ^ dt1; /* 5 - - - 6 0 4 - */ \ + \ + dt1 = dw >> 8; /* - 7 6 5 4 3 2 1 */ \ + dt1 = dt1 & 0x00ff0000000000ff; /* - 7 - - - - - 1 */ \ + dt0 = dt0 ^ dt1; /* 5 7 - - 6 0 4 1 */ \ + \ + dt1 = dw << 8; /* 6 5 4 3 2 1 0 - */ \ + dt1 = dt1 & 0x000000ff00000000; /* - - - 3 - - - - */ \ + dt0 = dt0 ^ dt1; /* 5 7 - 3 6 0 4 1 */ \ + \ + dt1 = dw << 24; /* 4 3 2 1 0 - - - */ \ + dw = dt1 & 0x0000ff0000000000; /* - - 2 - - - - - */ \ + dw = dw ^ dt0; /* 5 7 2 3 6 0 4 1 */ + +#else + +#define PERMUTATION() \ +/* permutation */ \ + \ + /* 7 6 5 4 3 2 1 0 */ \ + /* 5 7 2 3 6 0 4 1 */ \ + \ + /* w0 (3 2 1 0) */ \ + /* w1 (7 6 5 4) */ \ + \ + /* w0 (6 0 4 1) */ \ + /* w1 (5 7 2 3) */ \ + \ + t0 = w1 << 8; /* 6 5 4 - */ \ + t0 = t0 & 0xff00ff00; /* 6 - 4 - */ \ + \ + t1 = w1 << 16; /* 5 4 - - */ \ + t1 = t1 & 0xff000000; /* 5 - - - */ \ + \ + t2 = w1 & 0xff000000; /* 7 - - - */ \ + t2 = t2 >> 8; /* - 7 - - */ \ + t1 = t1 ^ t2; /* 5 7 - - */ \ + \ + t2 = w0 & 0xff000000; /* 3 - - - */ \ + t2 = t2 >> 24; /* - - - 3 */ \ + t1 = t1 ^ t2; /* 5 7 - 3 */ \ + \ + w1 = w0 >> 8; /* - 3 2 1 */ \ + w1 = w1 & 0x0000ff00; /* - - 2 - */ \ + w1 = w1 ^ t1; /* 5 7 2 3 */ \ + \ + t2 = w0 & 0x0000ff00; /* - - 1 - */ \ + t2 = t2 >> 8; /* - - - 1 */ \ + t0 = t0 ^ t2; /* 6 - 4 1 */ \ + \ + w0 = w0 << 16; /* 1 0 - - */ \ + w0 = w0 & 0x00ff0000; /* - 0 - - */ \ + w0 = w0 ^ t0; /* 6 0 4 1 */ + +#endif + diff --git a/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_key_schedule2.c b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_key_schedule2.c new file mode 100644 index 0000000..c2f30de --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_key_schedule2.c @@ -0,0 +1,431 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * load * AC(c0 c1) ^ TK3 + * calc AC(c0 c1) ^ TK2 -> store + * ART(TK2) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + dw = ((dw << 1) & 0xfefefefefefefefe) ^ \ + (((dw >> 7) ^ (dw >> 5)) & 0x0101010101010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2 = dw ^ *tk3; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint64_t* tk2; // used in MACRO + uint64_t* tk3; // used in MACRO + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + // load master key + dw = *(uint64_t*)&roundKeys[16]; + + tk2 = (uint64_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2 = dw ^ *tk3; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[24]; + + tk2 = (uint64_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint64_t* tk2; // used in MACRO + uint64_t* tk3; // used in MACRO + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[16]; + + tk2 = (uint64_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2 = dw ^ *tk3; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[24]; + + tk2 = (uint64_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define PERMUTATION_TK2() \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK2) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x6 x5 x4 x3 x2 x1 x0 x7^x5) */ \ + w0 = ((w0 << 1) & 0xfefefefe) ^ \ + (((w0 >> 7) ^ (w0 >> 5)) & 0x01010101); \ + w1 = ((w1 << 1) & 0xfefefefe) ^ \ + (((w1 >> 7) ^ (w1 >> 5)) & 0x01010101); \ + \ + /* Load TK3 */ \ + /* TK2^TK3^AC(c0 c1) */ \ + /* store */ \ + *tk2++ = w0 ^ *tk3++; \ + *tk2++ = w1 ^ *tk3++; \ + tk2 += 2; \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[16]; + w1 = *(uint32_t*)&roundKeys[20]; + + tk2 = (uint32_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th,43th, ... ,51th,53th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[24]; + w1 = *(uint32_t*)&roundKeys[28]; + + tk2 = (uint32_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + PERMUTATION_TK2(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys) +{ + uint32_t* tk2; // used in MACRO + uint32_t* tk3; // used in MACRO + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[16]; + w1 = *(uint32_t*)&roundKeys[20]; + + tk2 = (uint32_t*)&roundKeys[64]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk2++ = w0 ^ *tk3++; + *tk2++ = w1 ^ *tk3++; + + tk2 += 2; + tk3 += 2; + + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + PERMUTATION_TK2(); + } + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[24]; + w1 = *(uint32_t*)&roundKeys[28]; + + tk2 = (uint32_t*)&roundKeys[72]; +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + PERMUTATION_TK2(); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_key_schedule3.c b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_key_schedule3.c new file mode 100644 index 0000000..5dcaf7f --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_key_schedule3.c @@ -0,0 +1,428 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * AC(c0 c1) ^ TK3 -> store + * ART(TK3) + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +#ifdef ___ENABLE_DWORD_CAST + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + dw = ((dw >> 1) & 0x7f7f7f7f7f7f7f7f) ^ \ + (((dw << 7) ^ (dw << 1)) & 0x8080808080808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + dt0 = dw ^ c0Val; \ + *tk3 = dt0 ^ ((uint64_t)c1Val << 40); \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys) +{ + uint64_t *tk3; + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[32]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = dw ^ 0x01; + tk3 += 1; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x0); + PERMUTATION_TK3(0xf, 0x1); + PERMUTATION_TK3(0xd, 0x3); + PERMUTATION_TK3(0x7, 0x3); + PERMUTATION_TK3(0xe, 0x1); + PERMUTATION_TK3(0x9, 0x3); + PERMUTATION_TK3(0x7, 0x2); + PERMUTATION_TK3(0xd, 0x1); + PERMUTATION_TK3(0x5, 0x3); + + PERMUTATION_TK3(0x6, 0x1); + PERMUTATION_TK3(0x8, 0x1); + PERMUTATION_TK3(0x1, 0x2); + PERMUTATION_TK3(0x5, 0x0); + PERMUTATION_TK3(0x7, 0x1); + PERMUTATION_TK3(0xc, 0x1); + PERMUTATION_TK3(0x1, 0x3); + PERMUTATION_TK3(0x6, 0x0); + PERMUTATION_TK3(0xb, 0x1); + PERMUTATION_TK3(0xd, 0x2); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x3); + PERMUTATION_TK3(0x2, 0x1); + PERMUTATION_TK3(0x8, 0x0); + PERMUTATION_TK3(0x2, 0x2); + PERMUTATION_TK3(0x9, 0x0); + PERMUTATION_TK3(0x6, 0x2); + PERMUTATION_TK3(0x9, 0x1); + PERMUTATION_TK3(0x5, 0x2); + +#endif + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[40]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[392]; +#else + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x0); + PERMUTATION_TK3(0xf, 0x0); + PERMUTATION_TK3(0xe, 0x3); + PERMUTATION_TK3(0xb, 0x3); + PERMUTATION_TK3(0xf, 0x2); + PERMUTATION_TK3(0xc, 0x3); + PERMUTATION_TK3(0x3, 0x3); + PERMUTATION_TK3(0xe, 0x0); + PERMUTATION_TK3(0xa, 0x3); + PERMUTATION_TK3(0xb, 0x2); + + PERMUTATION_TK3(0xc, 0x2); + PERMUTATION_TK3(0x0, 0x3); + PERMUTATION_TK3(0x2, 0x0); + PERMUTATION_TK3(0xb, 0x0); + PERMUTATION_TK3(0xe, 0x2); + PERMUTATION_TK3(0x8, 0x3); + PERMUTATION_TK3(0x3, 0x2); + PERMUTATION_TK3(0xd, 0x0); + PERMUTATION_TK3(0x6, 0x3); + PERMUTATION_TK3(0xa, 0x1); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x2); + PERMUTATION_TK3(0x4, 0x2); + PERMUTATION_TK3(0x1, 0x1); + PERMUTATION_TK3(0x4, 0x0); + PERMUTATION_TK3(0x3, 0x1); + PERMUTATION_TK3(0xc, 0x0); + PERMUTATION_TK3(0x2, 0x3); + PERMUTATION_TK3(0xa, 0x0); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC) +{ + uint64_t *tk3; + uint64_t dt0; // used in MACRO + uint64_t dt1; // used in MACRO + uint64_t dw; + uint64_t c0; + uint64_t c1; + + // odd + + // load master key + dw = *(uint64_t*)&roundKeys[32]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint64_t*)&roundKeys[384]; +#else + tk3 = (uint64_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = dw ^ 0x01; + tk3 += 1; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + dw = *(uint64_t*)&roundKeys[40]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = (uint64_t*)&roundKeys[392]; +#else + pRC -= 110; + tk3 = (uint64_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define PERMUTATION_TK3(c0Val, c1Val) \ + \ + /* permutation */ \ + \ + PERMUTATION() \ + \ + /* LFSR(for TK3) (x7 x6 x5 x4 x3 x2 x1 x0) -> (x0^x6 x7 x6 x5 x4 x3 x2 x1) */ \ + w0 = ((w0 >> 1) & 0x7f7f7f7f) ^ \ + (((w0 << 7) ^ (w0 << 1)) & 0x80808080); \ + w1 = ((w1 >> 1) & 0x7f7f7f7f) ^ \ + (((w1 << 7) ^ (w1 << 1)) & 0x80808080); \ + \ + /* K3^AC(c0 c1) */ \ + /* store */ \ + *tk3++ = w0 ^ c0Val; \ + *tk3++ = w1 ^ ((uint32_t)c1Val << 8); \ + tk3 += 2; + +#ifndef ___SKINNY_LOOP + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[32]; + w1 = *(uint32_t*)&roundKeys[36]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + // 3rd,5th, ... ,37th,39th round + PERMUTATION_TK3(0x7, 0x0); + PERMUTATION_TK3(0xf, 0x1); + PERMUTATION_TK3(0xd, 0x3); + PERMUTATION_TK3(0x7, 0x3); + PERMUTATION_TK3(0xe, 0x1); + PERMUTATION_TK3(0x9, 0x3); + PERMUTATION_TK3(0x7, 0x2); + PERMUTATION_TK3(0xd, 0x1); + PERMUTATION_TK3(0x5, 0x3); + + PERMUTATION_TK3(0x6, 0x1); + PERMUTATION_TK3(0x8, 0x1); + PERMUTATION_TK3(0x1, 0x2); + PERMUTATION_TK3(0x5, 0x0); + PERMUTATION_TK3(0x7, 0x1); + PERMUTATION_TK3(0xc, 0x1); + PERMUTATION_TK3(0x1, 0x3); + PERMUTATION_TK3(0x6, 0x0); + PERMUTATION_TK3(0xb, 0x1); + PERMUTATION_TK3(0xd, 0x2); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41td,43th, ... ,53th,55th round + PERMUTATION_TK3(0x4, 0x3); + PERMUTATION_TK3(0x2, 0x1); + PERMUTATION_TK3(0x8, 0x0); + PERMUTATION_TK3(0x2, 0x2); + PERMUTATION_TK3(0x9, 0x0); + PERMUTATION_TK3(0x6, 0x2); + PERMUTATION_TK3(0x9, 0x1); + PERMUTATION_TK3(0x5, 0x2); + +#endif + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[40]; + w1 = *(uint32_t*)&roundKeys[44]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[392]; +#else + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... ,38th,40th round + PERMUTATION_TK3(0x3, 0x0); + PERMUTATION_TK3(0xf, 0x0); + PERMUTATION_TK3(0xe, 0x3); + PERMUTATION_TK3(0xb, 0x3); + PERMUTATION_TK3(0xf, 0x2); + PERMUTATION_TK3(0xc, 0x3); + PERMUTATION_TK3(0x3, 0x3); + PERMUTATION_TK3(0xe, 0x0); + PERMUTATION_TK3(0xa, 0x3); + PERMUTATION_TK3(0xb, 0x2); + + PERMUTATION_TK3(0xc, 0x2); + PERMUTATION_TK3(0x0, 0x3); + PERMUTATION_TK3(0x2, 0x0); + PERMUTATION_TK3(0xb, 0x0); + PERMUTATION_TK3(0xe, 0x2); + PERMUTATION_TK3(0x8, 0x3); + PERMUTATION_TK3(0x3, 0x2); + PERMUTATION_TK3(0xd, 0x0); + PERMUTATION_TK3(0x6, 0x3); + PERMUTATION_TK3(0xa, 0x1); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 42nd,44th, ... ,54th,56th round + PERMUTATION_TK3(0x9, 0x2); + PERMUTATION_TK3(0x4, 0x2); + PERMUTATION_TK3(0x1, 0x1); + PERMUTATION_TK3(0x4, 0x0); + PERMUTATION_TK3(0x3, 0x1); + PERMUTATION_TK3(0xc, 0x0); + PERMUTATION_TK3(0x2, 0x3); + PERMUTATION_TK3(0xa, 0x0); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC) +{ + uint32_t *tk3; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t w0; + uint32_t w1; + uint32_t c0; + uint32_t c1; + + // odd + + // load master key + w0 = *(uint32_t*)&roundKeys[32]; + w1 = *(uint32_t*)&roundKeys[36]; + +#ifndef ___NUM_OF_ROUNDS_56 + tk3 = (uint32_t*)&roundKeys[384]; +#else + tk3 = (uint32_t*)&roundKeys[512]; +#endif + + // 1st round + *tk3++ = w0 ^ 0x01; + *tk3++ = w1; + tk3 += 2; + + pRC += 4; + // 3rd,5th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<19;i++) +#else + for(int i=0;i<27;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + + // even + + // load master key + w0 = *(uint32_t*)&roundKeys[40]; + w1 = *(uint32_t*)&roundKeys[44]; + +#ifndef ___NUM_OF_ROUNDS_56 + pRC -= 78; + tk3 = (uint32_t*)&roundKeys[392]; +#else + pRC -= 110; + tk3 = (uint32_t*)&roundKeys[520]; +#endif + + // 2nd,4th, ... +#ifndef ___NUM_OF_ROUNDS_56 + for(int i=0;i<20;i++) +#else + for(int i=0;i<28;i++) +#endif + { + c0 = *pRC++; + c1 = *pRC++; + pRC += 2; + PERMUTATION_TK3(c0, c1); + } + +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ + diff --git a/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_main.c b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_main.c new file mode 100644 index 0000000..8a6e75f --- /dev/null +++ b/romulus/Implementations/crypto_aead/romulusn1/opt32a_NEC/skinny_main.c @@ -0,0 +1,675 @@ +/****************************************************************************** + * Copyright (c) 2020, NEC Corporation. + * + * THIS CODE IS FURNISHED TO YOU "AS IS" WITHOUT WARRANTY OF ANY KIND. + * + *****************************************************************************/ + +/* + * SKINNY-128-384 + * + * ART(TK1) -> store + * load AC(c0 c1) ^ TK3 ^ TK2 + * load TK1 + * calc AC(c0 c1) ^ TK3 ^ TK2 ^ TK1 -> use at (AC->ART) + * SC->SR->(AC->ART)->MC + * + * number of rounds : 40 or 56 + */ + +#include "skinny.h" + +/* + * S-BOX + */ +unsigned char SBOX[] += { + // Original + 0x65, 0x4c, 0x6a, 0x42, 0x4b, 0x63, 0x43, 0x6b, 0x55, 0x75, 0x5a, 0x7a, 0x53, 0x73, 0x5b, 0x7b, + 0x35, 0x8c, 0x3a, 0x81, 0x89, 0x33, 0x80, 0x3b, 0x95, 0x25, 0x98, 0x2a, 0x90, 0x23, 0x99, 0x2b, + 0xe5, 0xcc, 0xe8, 0xc1, 0xc9, 0xe0, 0xc0, 0xe9, 0xd5, 0xf5, 0xd8, 0xf8, 0xd0, 0xf0, 0xd9, 0xf9, + 0xa5, 0x1c, 0xa8, 0x12, 0x1b, 0xa0, 0x13, 0xa9, 0x05, 0xb5, 0x0a, 0xb8, 0x03, 0xb0, 0x0b, 0xb9, + 0x32, 0x88, 0x3c, 0x85, 0x8d, 0x34, 0x84, 0x3d, 0x91, 0x22, 0x9c, 0x2c, 0x94, 0x24, 0x9d, 0x2d, + 0x62, 0x4a, 0x6c, 0x45, 0x4d, 0x64, 0x44, 0x6d, 0x52, 0x72, 0x5c, 0x7c, 0x54, 0x74, 0x5d, 0x7d, + 0xa1, 0x1a, 0xac, 0x15, 0x1d, 0xa4, 0x14, 0xad, 0x02, 0xb1, 0x0c, 0xbc, 0x04, 0xb4, 0x0d, 0xbd, + 0xe1, 0xc8, 0xec, 0xc5, 0xcd, 0xe4, 0xc4, 0xed, 0xd1, 0xf1, 0xdc, 0xfc, 0xd4, 0xf4, 0xdd, 0xfd, + 0x36, 0x8e, 0x38, 0x82, 0x8b, 0x30, 0x83, 0x39, 0x96, 0x26, 0x9a, 0x28, 0x93, 0x20, 0x9b, 0x29, + 0x66, 0x4e, 0x68, 0x41, 0x49, 0x60, 0x40, 0x69, 0x56, 0x76, 0x58, 0x78, 0x50, 0x70, 0x59, 0x79, + 0xa6, 0x1e, 0xaa, 0x11, 0x19, 0xa3, 0x10, 0xab, 0x06, 0xb6, 0x08, 0xba, 0x00, 0xb3, 0x09, 0xbb, + 0xe6, 0xce, 0xea, 0xc2, 0xcb, 0xe3, 0xc3, 0xeb, 0xd6, 0xf6, 0xda, 0xfa, 0xd3, 0xf3, 0xdb, 0xfb, + 0x31, 0x8a, 0x3e, 0x86, 0x8f, 0x37, 0x87, 0x3f, 0x92, 0x21, 0x9e, 0x2e, 0x97, 0x27, 0x9f, 0x2f, + 0x61, 0x48, 0x6e, 0x46, 0x4f, 0x67, 0x47, 0x6f, 0x51, 0x71, 0x5e, 0x7e, 0x57, 0x77, 0x5f, 0x7f, + 0xa2, 0x18, 0xae, 0x16, 0x1f, 0xa7, 0x17, 0xaf, 0x01, 0xb2, 0x0e, 0xbe, 0x07, 0xb7, 0x0f, 0xbf, + 0xe2, 0xca, 0xee, 0xc6, 0xcf, 0xe7, 0xc7, 0xef, 0xd2, 0xf2, 0xde, 0xfe, 0xd7, 0xf7, 0xdf, 0xff, +}; + + /* + * S-BOX ^ AC(c2) + */ +unsigned char SBOX2[] += { // Original ^ c2(0x02) + 0x67, 0x4e, 0x68, 0x40, 0x49, 0x61, 0x41, 0x69, 0x57, 0x77, 0x58, 0x78, 0x51, 0x71, 0x59, 0x79, + 0x37, 0x8e, 0x38, 0x83, 0x8b, 0x31, 0x82, 0x39, 0x97, 0x27, 0x9a, 0x28, 0x92, 0x21, 0x9b, 0x29, + 0xe7, 0xce, 0xea, 0xc3, 0xcb, 0xe2, 0xc2, 0xeb, 0xd7, 0xf7, 0xda, 0xfa, 0xd2, 0xf2, 0xdb, 0xfb, + 0xa7, 0x1e, 0xaa, 0x10, 0x19, 0xa2, 0x11, 0xab, 0x07, 0xb7, 0x08, 0xba, 0x01, 0xb2, 0x09, 0xbb, + 0x30, 0x8a, 0x3e, 0x87, 0x8f, 0x36, 0x86, 0x3f, 0x93, 0x20, 0x9e, 0x2e, 0x96, 0x26, 0x9f, 0x2f, + 0x60, 0x48, 0x6e, 0x47, 0x4f, 0x66, 0x46, 0x6f, 0x50, 0x70, 0x5e, 0x7e, 0x56, 0x76, 0x5f, 0x7f, + 0xa3, 0x18, 0xae, 0x17, 0x1f, 0xa6, 0x16, 0xaf, 0x00, 0xb3, 0x0e, 0xbe, 0x06, 0xb6, 0x0f, 0xbf, + 0xe3, 0xca, 0xee, 0xc7, 0xcf, 0xe6, 0xc6, 0xef, 0xd3, 0xf3, 0xde, 0xfe, 0xd6, 0xf6, 0xdf, 0xff, + 0x34, 0x8c, 0x3a, 0x80, 0x89, 0x32, 0x81, 0x3b, 0x94, 0x24, 0x98, 0x2a, 0x91, 0x22, 0x99, 0x2b, + 0x64, 0x4c, 0x6a, 0x43, 0x4b, 0x62, 0x42, 0x6b, 0x54, 0x74, 0x5a, 0x7a, 0x52, 0x72, 0x5b, 0x7b, + 0xa4, 0x1c, 0xa8, 0x13, 0x1b, 0xa1, 0x12, 0xa9, 0x04, 0xb4, 0x0a, 0xb8, 0x02, 0xb1, 0x0b, 0xb9, + 0xe4, 0xcc, 0xe8, 0xc0, 0xc9, 0xe1, 0xc1, 0xe9, 0xd4, 0xf4, 0xd8, 0xf8, 0xd1, 0xf1, 0xd9, 0xf9, + 0x33, 0x88, 0x3c, 0x84, 0x8d, 0x35, 0x85, 0x3d, 0x90, 0x23, 0x9c, 0x2c, 0x95, 0x25, 0x9d, 0x2d, + 0x63, 0x4a, 0x6c, 0x44, 0x4d, 0x65, 0x45, 0x6d, 0x53, 0x73, 0x5c, 0x7c, 0x55, 0x75, 0x5d, 0x7d, + 0xa0, 0x1a, 0xac, 0x14, 0x1d, 0xa5, 0x15, 0xad, 0x03, 0xb0, 0x0c, 0xbc, 0x05, 0xb5, 0x0d, 0xbd, + 0xe0, 0xc8, 0xec, 0xc4, 0xcd, 0xe5, 0xc5, 0xed, 0xd0, 0xf0, 0xdc, 0xfc, 0xd5, 0xf5, 0xdd, 0xfd, +}; + +#ifdef ___SKINNY_LOOP +/* + * Round Constants + */ +unsigned char RC[] += { + 0x01, 0x00, 0x03, 0x00, 0x07, 0x00, 0x0f, 0x00, 0x0f, 0x01, 0x0e, 0x03, 0x0d, 0x03, 0x0b, 0x03, + 0x07, 0x03, 0x0f, 0x02, 0x0e, 0x01, 0x0c, 0x03, 0x09, 0x03, 0x03, 0x03, 0x07, 0x02, 0x0e, 0x00, + 0x0d, 0x01, 0x0a, 0x03, 0x05, 0x03, 0x0b, 0x02, 0x06, 0x01, 0x0c, 0x02, 0x08, 0x01, 0x00, 0x03, + 0x01, 0x02, 0x02, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x07, 0x01, 0x0e, 0x02, 0x0c, 0x01, 0x08, 0x03, + 0x01, 0x03, 0x03, 0x02, 0x06, 0x00, 0x0d, 0x00, 0x0b, 0x01, 0x06, 0x03, 0x0d, 0x02, 0x0a, 0x01, +#ifdef ___NUM_OF_ROUNDS_56 + 0x04, 0x03, 0x09, 0x02, 0x02, 0x01, 0x04, 0x02, 0x08, 0x00, 0x01, 0x01, 0x02, 0x02, 0x04, 0x00, + 0x09, 0x00, 0x03, 0x01, 0x06, 0x02, 0x0c, 0x00, 0x09, 0x01, 0x02, 0x03, 0x05, 0x02, 0x0a, 0x00, +#endif + }; +#endif + +extern void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2); +extern void RunEncryptionKeyScheduleTK2(unsigned char *roundKeys); +#ifdef ___SKINNY_LOOP +extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys, unsigned char *pRC); +#else +extern void RunEncryptionKeyScheduleTK3(unsigned char *roundKeys); +#endif + +void skinny_128_384_enc123_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + uint32_t *pt = (uint32_t*)&pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + pt[8] = *(uint32_t*)(&K[0]); + pack_word(K[7], K[4], K[5], K[6], pt[9]); + pt[10] = *(uint32_t*)(&K[8]); + pack_word(K[15], K[12], K[13], K[14], pt[11]); + +#ifdef ___SKINNY_LOOP + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys, RC); +#else + RunEncryptionKeyScheduleTK3(pskinny_ctrl->roundKeys); +#endif + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + + pskinny_ctrl->func_skinny_128_384_enc = skinny_128_384_enc12_12; + +} + +void skinny_128_384_enc12_12 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + pt[4] = *(uint32_t*)(&T[0]); + pack_word(T[7], T[4], T[5], T[6], pt[5]); + pt[6] = *(uint32_t*)(&T[8]); + pack_word(T[15], T[12], T[13], T[14], pt[7]); + + RunEncryptionKeyScheduleTK2(pskinny_ctrl->roundKeys); + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +extern void skinny_128_384_enc1_1 (unsigned char* input, skinny_ctrl* pskinny_ctrl, unsigned char* CNT, unsigned char* T, const unsigned char* K) +{ + (void)T; + (void)K; + + uint32_t *pt = &pskinny_ctrl->roundKeys[0]; + + pt[0] = *(uint32_t*)(&CNT[0]); + pack_word(CNT[7], CNT[4], CNT[5], CNT[6], pt[1]); + + Encrypt(input, pskinny_ctrl->roundKeys, SBOX, SBOX2); + +} + +#define PERMUTATION_TK1() \ + \ +/* permutation */ \ +{ \ + unsigned char tmp0 = roundKeys[0]; \ + unsigned char tmp1 = roundKeys[1]; \ + unsigned char tmp2 = roundKeys[2]; \ + unsigned char tmp3 = roundKeys[3]; \ + unsigned char tmp4 = roundKeys[4]; \ + unsigned char tmp5 = roundKeys[5]; \ + unsigned char tmp6 = roundKeys[6]; \ + unsigned char tmp7 = roundKeys[7]; \ + \ + unsigned char* dst = &roundKeys[8]; \ + \ + /* 5 7 2 3 6 0 4 1 */ \ + *dst++ = tmp1; \ + *dst++ = tmp4; \ + *dst++ = tmp0; \ + *dst++ = tmp6; \ + *dst++ = tmp3; \ + *dst++ = tmp2; \ + *dst++ = tmp7; \ + *dst++ = tmp5; \ + \ + /* 2 5 0 6 7 1 3 4 */ \ + *dst++ = tmp4; \ + *dst++ = tmp3; \ + *dst++ = tmp1; \ + *dst++ = tmp7; \ + *dst++ = tmp6; \ + *dst++ = tmp0; \ + *dst++ = tmp5; \ + *dst++ = tmp2; \ + \ + /* 0 2 1 7 5 4 6 3 */ \ + *dst++ = tmp3; \ + *dst++ = tmp6; \ + *dst++ = tmp4; \ + *dst++ = tmp5; \ + *dst++ = tmp7; \ + *dst++ = tmp1; \ + *dst++ = tmp2; \ + *dst++ = tmp0; \ + \ + /* 1 0 4 5 2 3 7 6 */ \ + *dst++ = tmp6; \ + *dst++ = tmp7; \ + *dst++ = tmp3; \ + *dst++ = tmp2; \ + *dst++ = tmp5; \ + *dst++ = tmp4; \ + *dst++ = tmp0; \ + *dst++ = tmp1; \ + \ + /* 4 1 3 2 0 6 5 7 */ \ + *dst++ = tmp7; \ + *dst++ = tmp5; \ + *dst++ = tmp6; \ + *dst++ = tmp0; \ + *dst++ = tmp2; \ + *dst++ = tmp3; \ + *dst++ = tmp1; \ + *dst++ = tmp4; \ + \ + /* 3 4 6 0 1 7 2 5 */ \ + *dst++ = tmp5; \ + *dst++ = tmp2; \ + *dst++ = tmp7; \ + *dst++ = tmp1; \ + *dst++ = tmp0; \ + *dst++ = tmp6; \ + *dst++ = tmp4; \ + *dst++ = tmp3; \ + \ + /* 6 3 7 1 4 5 0 2 */ \ + *dst++ = tmp2; \ + *dst++ = tmp0; \ + *dst++ = tmp5; \ + *dst++ = tmp4; \ + *dst++ = tmp1; \ + *dst++ = tmp7; \ + *dst++ = tmp3; \ + *dst++ = tmp6; \ +} + +#define SBOX_0(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t0; \ + b1 = (uint8_t)t1; \ + b2 = (uint8_t)t2; \ + b3 = (uint8_t)t3; + +#define SBOX_8(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t3; \ + b1 = (uint8_t)t0; \ + b2 = (uint8_t)t1; \ + b3 = (uint8_t)t2; + +#define SBOX_16(b0, b1, b2, b3) \ + \ + t0 = sbox2[b0]; /* AC(c2) */ \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t2; \ + b1 = (uint8_t)t3; \ + b2 = (uint8_t)t0; \ + b3 = (uint8_t)t1; + +#define SBOX_24(b0, b1, b2, b3) \ + \ + t0 = sbox[b0]; \ + t1 = sbox[b1]; \ + t2 = sbox[b2]; \ + t3 = sbox[b3]; \ + \ + b0 = (uint8_t)t1; \ + b1 = (uint8_t)t2; \ + b2 = (uint8_t)t3; \ + b3 = (uint8_t)t0; + +#ifdef ___ENABLE_DWORD_CAST + +#define SKINNY_MAIN() \ +{ \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK1^TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint64_t*)&block[0]; \ + t1 ^= *tk1++; \ + t1 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint64_t*)&block[8]; \ + t0 = t2 >> 32; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = (t1 >> 32) ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t0 ^ t3; \ + \ + *(uint32_t*)&block[0] = (uint32_t)t0; \ + *(uint32_t*)&block[4] = (uint32_t)t1; \ + *(uint32_t*)&block[8] = (uint32_t)t2; \ + *(uint32_t*)&block[12] = (uint32_t)t3; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint64_t*)&block[0]; \ + t1 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint64_t*)&block[8]; \ + t0 = t2 >> 32; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = (t1 >> 32) ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t0 ^ t3; \ + \ + *(uint32_t*)&block[0] = (uint32_t)t0; \ + *(uint32_t*)&block[4] = (uint32_t)t1; \ + *(uint32_t*)&block[8] = (uint32_t)t2; \ + *(uint32_t*)&block[12] = (uint32_t)t3; \ +} + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint64_t *tk1; + uint64_t *tk2; + uint64_t t0; // used in MACRO + uint64_t t1; // used in MACRO + uint64_t t2; // used in MACRO + uint64_t t3; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint64_t*)&roundKeys[64]; + tk1 = (uint64_t*)&roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint64_t*)&roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint64_t *tk1; + uint64_t *tk2; + uint64_t t0; // used in MACRO + uint64_t t1; // used in MACRO + uint64_t t2; // used in MACRO + uint64_t t3; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint64_t*)&roundKeys[64]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = (uint64_t*)&roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = (uint64_t*)&roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +} + +#endif /* ___SKINNY_LOOP */ + +#else /* ___ENABLE_DWORD_CAST */ + +#define SKINNY_MAIN() \ +{ \ + \ + /* odd */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK1^TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint32_t*)&block[0]; \ + t0 = *(uint32_t*)&block[4]; \ + t1 ^= *tk1++; \ + t1 ^= *tk2++; \ + t0 ^= *tk1++; \ + t0 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint32_t*)&block[8]; \ + t4 = *(uint32_t*)&block[12]; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = t0 ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t3 ^ t4; \ + \ + *(uint32_t*)&block[0] = t0; \ + *(uint32_t*)&block[4] = t1; \ + *(uint32_t*)&block[8] = t2; \ + *(uint32_t*)&block[12] = t3; \ + \ + /* even */ \ + \ + /* LUT(with ShiftRows & AC(c2))*/ \ + \ + SBOX_0( block[0], block[1], block[2], block[3]); \ + SBOX_8( block[4], block[5], block[6], block[7]); \ + SBOX_16(block[8], block[9], block[10], block[11]); \ + SBOX_24(block[12], block[13], block[14], block[15]); \ + \ + /* TK2^TK3^AC(c0 c1) */ \ + \ + t1 = *(uint32_t*)&block[0]; \ + t0 = *(uint32_t*)&block[4]; \ + t1 ^= *tk2++; \ + t0 ^= *tk2++; \ + \ + /* MC */ \ + \ + t2 = *(uint32_t*)&block[8]; \ + t4 = *(uint32_t*)&block[12]; \ + \ + /* 0^2 */ \ + t3 = t1 ^ t2; \ + \ + /* 1^2 */ \ + t2 = t0 ^ t2; \ + \ + /* 0^2^3 */ \ + t0 = t3 ^ t4; \ + \ + *(uint32_t*)&block[0] = t0; \ + *(uint32_t*)&block[4] = t1; \ + *(uint32_t*)&block[8] = t2; \ + *(uint32_t*)&block[12] = t3; \ +} + +#ifndef ___SKINNY_LOOP + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t t4; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint32_t*)&roundKeys[64]; + tk1 = (uint32_t*)&roundKeys[0]; + + // 1st, ...,16th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 17th, ...,32th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 33th, ...,40th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#ifdef ___NUM_OF_ROUNDS_56 + + // 41th, ...,48th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + + tk1 = (uint32_t*)&roundKeys[0]; + + // 49th, ... ,56th round + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + SKINNY_MAIN(); + +#endif + +} + +#else /* ___SKINNY_LOOP */ + +void Encrypt(unsigned char *block, unsigned char *roundKeys, unsigned char *sbox, unsigned char *sbox2) +{ + uint32_t *tk1; + uint32_t *tk2; + uint32_t t0; // used in MACRO + uint32_t t1; // used in MACRO + uint32_t t2; // used in MACRO + uint32_t t3; // used in MACRO + uint32_t t4; // used in MACRO + +// TK1 + + PERMUTATION_TK1(); + +// SB+AC+ShR+MC + + tk2 = (uint32_t*)&roundKeys[64]; + + // 1st, ... ,32th or 48th round +#ifndef ___NUM_OF_ROUNDS_56 + for(int j=0;j<2;j++) +#else + for(int j=0;j<3;j++) +#endif + { + tk1 = (uint32_t*)&roundKeys[0]; + for(int i=0;i<8;i++) + { + SKINNY_MAIN(); + } + } + + // 33th , ... ,40th or 49th, .... ,56th round + { + tk1 = (uint32_t*)&roundKeys[0]; + for(int i=0;i<4;i++) + { + SKINNY_MAIN(); + } + } +} + +#endif /* ___SKINNY_LOOP */ + +#endif /* ___ENABLE_DWORD_CAST */ +